in optimum/habana/transformers/training_args.py [0:0]
def __post_init__(self):
if self.use_hpu_graphs:
warnings.warn(
(
"`--use_hpu_graphs` is deprecated and will be removed in a future version of 🤗 Optimum Habana. Use `--use_hpu_graphs_for_training` or `--use_hpu_graphs_for_inference` instead."
),
FutureWarning,
)
use_hpu_graphs = self.use_hpu_graphs or self.use_hpu_graphs_for_inference or self.use_hpu_graphs_for_training
if (self.use_lazy_mode or use_hpu_graphs or self.gaudi_config_name) and not self.use_habana:
raise ValueError(
"`--use_lazy_mode`, `--use_hpu_graphs_for_inference`, `--use_hpu_graphs_for_training` and `--gaudi_config_name` cannot be used without `--use_habana`."
)
if use_hpu_graphs and (not self.use_lazy_mode and not self.torch_compile_backend):
raise ValueError(
"`--use_hpu_graphs_for_inference` and `--use_hpu_graphs_for_training` cannot be used in eager mode. Please set `--use_lazy_mode` to True."
)
if self.distribution_strategy not in SUPPORTED_DISTRIBUTION_STRATEGIES:
raise ValueError(
f"`--distribution_strategy` is {self.distribution_strategy} which is an invalid or unsupported value. Possible choices are: {', '.join(SUPPORTED_DISTRIBUTION_STRATEGIES)}."
)
if self.disable_tensor_cache_hpu_graphs and not use_hpu_graphs:
raise ValueError("must be using hpu graphs to set disable_tensor_cache_hpu_graphs.")
if self.max_hpu_graphs is not None and not use_hpu_graphs:
raise ValueError("must be using hpu graphs to set max_hpu_graphs.")
# Raise errors for arguments that are not supported by optimum-habana
if self.fp16 or self.fp16_full_eval:
raise ValueError(
"--fp16, --fp16_backend, --fp16_full_eval and --fp16_opt_level are not"
" supported by optimum-habana. Mixed-precision can be enabled in your Gaudi configuration."
)
if self.tpu_num_cores or self.tpu_metrics_debug:
raise ValueError("TPUs are not supported by optimum-habana.")
if self.mp_parameters:
raise ValueError("--mp_parameters is not supported by optimum-habana.")
if self.tf32:
raise ValueError("--tf32 is not supported by optimum-habana.")
if self.throughput_warmup_steps < 0:
raise ValueError("--throughput_warmup_steps must be positive.")
# Set default output_dir if not provided
if self.output_dir is None:
self.output_dir = "trainer_output"
logger.info(
"No output directory specified, defaulting to 'trainer_output'. "
"To change this behavior, specify --output_dir when creating TrainingArguments."
)
# Parse in args that could be `dict` sent in from the CLI as a string
for field in self._VALID_DICT_FIELDS:
passed_value = getattr(self, field)
# We only want to do this if the str starts with a bracket to indicate a `dict`
# else its likely a filename if supported
if isinstance(passed_value, str) and passed_value.startswith("{"):
loaded_dict = json.loads(passed_value)
# Convert str values to types if applicable
loaded_dict = _convert_str_dict(loaded_dict)
setattr(self, field, loaded_dict)
# expand paths, if not os.makedirs("~/bar") will make directory
# in the current directory instead of the actual home
# see https://github.com/huggingface/transformers/issues/10628
if self.output_dir is not None:
self.output_dir = os.path.expanduser(self.output_dir)
if self.logging_dir is None and self.output_dir is not None:
self.logging_dir = os.path.join(self.output_dir, default_logdir())
if self.logging_dir is not None:
self.logging_dir = os.path.expanduser(self.logging_dir)
if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
if isinstance(self.eval_strategy, EvaluationStrategy):
warnings.warn(
"using `EvaluationStrategy` for `eval_strategy` is deprecated and will be removed in version 5"
" of 🤗 Transformers. Use `IntervalStrategy` instead",
FutureWarning,
)
# Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
self.eval_strategy = self.eval_strategy.value
self.eval_strategy = IntervalStrategy(self.eval_strategy)
self.logging_strategy = IntervalStrategy(self.logging_strategy)
self.save_strategy = SaveStrategy(self.save_strategy)
self.hub_strategy = HubStrategy(self.hub_strategy)
self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type)
if self.do_eval is False and self.eval_strategy != IntervalStrategy.NO:
self.do_eval = True
if self.torch_empty_cache_steps is not None:
if not (isinstance(self.torch_empty_cache_steps, int) and self.torch_empty_cache_steps > 0):
raise ValueError(
f"`torch_empty_cache_steps` must be an integer bigger than 0, got {self.torch_empty_cache_steps}."
)
# eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero
if self.eval_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
if self.logging_steps > 0:
logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}")
self.eval_steps = self.logging_steps
else:
raise ValueError(
f"evaluation strategy {self.eval_strategy} requires either non-zero --eval_steps or"
" --logging_steps"
)
# logging_steps must be non-zero for logging_strategy that is other than 'no'
if self.logging_strategy == IntervalStrategy.STEPS and self.logging_steps == 0:
raise ValueError(f"logging strategy {self.logging_strategy} requires non-zero --logging_steps")
if self.logging_strategy == IntervalStrategy.STEPS and self.logging_steps > 1:
if self.logging_steps != int(self.logging_steps):
raise ValueError(f"--logging_steps must be an integer if bigger than 1: {self.logging_steps}")
self.logging_steps = int(self.logging_steps)
if self.eval_strategy == IntervalStrategy.STEPS and self.eval_steps > 1:
if self.eval_steps != int(self.eval_steps):
raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}")
self.eval_steps = int(self.eval_steps)
if self.save_strategy == SaveStrategy.STEPS and self.save_steps > 1:
if self.save_steps != int(self.save_steps):
raise ValueError(f"--save_steps must be an integer if bigger than 1: {self.save_steps}")
self.save_steps = int(self.save_steps)
# Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
if self.load_best_model_at_end and self.save_strategy != SaveStrategy.BEST:
if self.eval_strategy != self.save_strategy:
raise ValueError(
"--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
f"strategy: {self.eval_strategy}\n- Save strategy: {self.save_strategy}"
)
if self.eval_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
if self.eval_steps < 1 or self.save_steps < 1:
if not (self.eval_steps < 1 and self.save_steps < 1):
raise ValueError(
"--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
"steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps "
f"{self.save_steps} and eval_steps {self.eval_steps}."
)
# Work around floating point precision issues
LARGE_MULTIPLIER = 1_000_000
if (self.save_steps * LARGE_MULTIPLIER) % (self.eval_steps * LARGE_MULTIPLIER) != 0:
raise ValueError(
"--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
f"steps, but found {self.save_steps}, which is not a multiple of {self.eval_steps}."
)
raise ValueError(
"--load_best_model_at_end requires the saving steps to be a round multiple of the evaluation "
f"steps, but found {self.save_steps}, which is not a round multiple of {self.eval_steps}."
)
safetensors_available = is_safetensors_available()
if self.save_safetensors and not safetensors_available:
raise ValueError(f"--save_safetensors={self.save_safetensors} requires safetensors to be installed!")
if not self.save_safetensors and safetensors_available:
logger.info(
f"Found safetensors installation, but --save_safetensors={self.save_safetensors}. "
f"Safetensors should be a preferred weights saving format due to security and performance reasons. "
f"If your model cannot be saved by safetensors please feel free to open an issue at "
f"https://github.com/huggingface/safetensors!"
)
if (
self.load_best_model_at_end or self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU
) and self.metric_for_best_model is None:
self.metric_for_best_model = "loss"
if self.greater_is_better is None and self.metric_for_best_model is not None:
self.greater_is_better = not (self.metric_for_best_model.endswith("loss"))
if self.run_name is None:
self.run_name = self.output_dir
if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU:
if self.eval_strategy == IntervalStrategy.NO:
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires an eval strategy")
if not is_torch_available():
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires torch>=0.2.0")
self.optim = OptimizerNames(self.optim)
if self.adafactor:
warnings.warn(
(
"`--adafactor` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--optim"
" adafactor` instead"
),
FutureWarning,
)
self.optim = OptimizerNames.ADAFACTOR
if self.optim == OptimizerNames.ADAMW_TORCH_FUSED and is_torch_available():
if version.parse(version.parse(torch.__version__).base_version) < version.parse("2.0.0"):
raise ValueError("--optim adamw_torch_fused requires PyTorch 2.0 or higher")
# We need to setup the accelerator config here *before* the first call to `self.device`
if is_accelerate_available():
if not isinstance(self.accelerator_config, AcceleratorConfig):
if self.accelerator_config is None:
self.accelerator_config = AcceleratorConfig()
elif isinstance(self.accelerator_config, dict):
self.accelerator_config = AcceleratorConfig(**self.accelerator_config)
# Check that a user didn't pass in the class instantiator
# such as `accelerator_config = AcceleratorConfig`
elif isinstance(self.accelerator_config, type):
raise NotImplementedError(
"Tried passing in a callable to `accelerator_config`, but this is not supported. "
"Please pass in a fully constructed `AcceleratorConfig` object instead."
)
else:
self.accelerator_config = AcceleratorConfig.from_json_file(self.accelerator_config)
if self.dataloader_drop_last:
self.accelerator_config.even_batches = False
# Disable average tokens when using single device
if self.average_tokens_across_devices:
try:
if self.world_size == 1:
logger.warning(
"average_tokens_across_devices is set to True but it is invalid when world size is"
"1. Turn it to False automatically."
)
self.average_tokens_across_devices = False
except ImportError as e:
logger.warning(f"Can not specify world size due to {e}. Turn average_tokens_across_devices to False.")
self.average_tokens_across_devices = False
if (self.torch_compile_mode is not None or self.torch_compile_backend is not None) and not self.torch_compile:
assert get_habana_frameworks_version().minor > 12, "Torch compile is not available"
self.torch_compile = True
assert not os.getenv("PT_HPU_LAZY_MODE", "1") != "0", "Dynamo and lazy are mutually exclusive."
# Note: PT_HPU_LAZY_MODE=0 needs to be set before library is loaded,
# setting it here would be too late - hence assertion.
if self.torch_compile and self.torch_compile_backend is None:
self.torch_compile_backend = "hpu_backend"
# accelerate integration for torch compile
if self.torch_compile:
# set env vars for accelerate
prefix = "ACCELERATE_DYNAMO_"
os.environ[prefix + "BACKEND"] = self.torch_compile_backend
if self.torch_compile_mode is not None:
os.environ[prefix + "MODE"] = self.torch_compile_mode
if self.compile_dynamic is not None:
os.environ[prefix + "USE_DYNAMIC"] = str(self.compile_dynamic)
# if training args is specified, it will override the one specified in the accelerate config
mixed_precision_dtype = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
if self.fp8:
mixed_precision_dtype = "fp8"
elif self.bf16:
mixed_precision_dtype = "bf16"
os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
if self.report_to is None:
logger.info(
"The default value for the training argument `--report_to` will change in v5 (from all installed "
"integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as "
"now. You should start updating your code and make this info disappear :-)."
)
self.report_to = "all"
if self.report_to == "all" or self.report_to == ["all"]:
# Import at runtime to avoid a circular import.
from transformers.integrations import get_available_reporting_integrations
self.report_to = get_available_reporting_integrations()
if "codecarbon" in self.report_to and torch.version.hip:
logger.warning(
"When using the Trainer, CodeCarbonCallback requires the `codecarbon` package, which is not compatible with AMD ROCm (https://github.com/mlco2/codecarbon/pull/490). Automatically disabling the codecarbon callback. Reference: https://huggingface.co/docs/transformers/v4.39.3/en/main_classes/trainer#transformers.TrainingArguments.report_to."
)
self.report_to.remove("codecarbon")
elif self.report_to == "none" or self.report_to == ["none"]:
self.report_to = []
elif not isinstance(self.report_to, list):
self.report_to = [self.report_to]
if self.warmup_ratio < 0 or self.warmup_ratio > 1:
raise ValueError("warmup_ratio must lie in range [0,1]")
elif self.warmup_ratio > 0 and self.warmup_steps > 0:
logger.info(
"Both warmup_ratio and warmup_steps given, warmup_steps will override any effect of warmup_ratio"
" during training"
)
if not isinstance(self.warmup_steps, int) or self.warmup_steps < 0:
raise ValueError("warmup_steps must be of type int and must be 0 or a positive integer.")
# Copy of https://github.com/huggingface/transformers/blob/b71f20a7c9f3716d30f6738501559acf863e2c5c/src/transformers/training_args.py#L1563
# except following changes, (1) Remove XLA specific code & (2) change fsdp_backward_prefetch to backward_prefetch
if isinstance(self.fsdp, bool):
self.fsdp = [FSDPOption.FULL_SHARD] if self.fsdp else ""
if isinstance(self.fsdp, str):
self.fsdp = [FSDPOption(s) for s in self.fsdp.split()]
if self.fsdp == [FSDPOption.OFFLOAD]:
raise ValueError(
"`--fsdp offload` can't work on its own. It needs to be added to `--fsdp full_shard` or "
'`--fsdp shard_grad_op`. For example, `--fsdp "full_shard offload"`.'
)
elif FSDPOption.FULL_SHARD in self.fsdp and FSDPOption.SHARD_GRAD_OP in self.fsdp:
raise ValueError("`--fsdp full_shard` is not compatible with `--fsdp shard_grad_op`.")
if self.gradient_checkpointing and (
FSDPOption.FULL_SHARD in self.fsdp or FSDPOption.HYBRID_SHARD in self.fsdp
):
logger.warning(
"When using FSDP full shard, instead of using `gradient_checkpointing` in TrainingArguments, please"
" use `activation_checkpointing` in `fsdp_config`. The former introduces a redundant AllGather"
" operation in backward pass. Reference: https://github.com/huggingface/transformers/issues/30404"
)
if self.fsdp_config is None:
self.fsdp_config = {}
if isinstance(self.fsdp_config, str):
if len(self.fsdp) == 0:
warnings.warn("`--fsdp_config` is useful only when `--fsdp` is specified.")
with open(self.fsdp_config, encoding="utf-8") as f:
self.fsdp_config = json.load(f)
for k in list(self.fsdp_config.keys()):
if k.startswith("fsdp_"):
v = self.fsdp_config.pop(k)
self.fsdp_config[k[5:]] = v
if self.fsdp_min_num_params > 0:
warnings.warn("using `--fsdp_min_num_params` is deprecated. Use fsdp_config instead ", FutureWarning)
self.fsdp_config["min_num_params"] = max(self.fsdp_config.get("min_num_params", 0), self.fsdp_min_num_params)
# if fsdp_config["transformer_layer_cls_to_wrap"] is specified as a string, convert it to a list with a single object
if isinstance(self.fsdp_config.get("transformer_layer_cls_to_wrap", None), str):
self.fsdp_config["transformer_layer_cls_to_wrap"] = [self.fsdp_config["transformer_layer_cls_to_wrap"]]
if self.fsdp_transformer_layer_cls_to_wrap is not None:
warnings.warn(
"using `--fsdp_transformer_layer_cls_to_wrap` is deprecated. Use fsdp_config instead ", FutureWarning
)
self.fsdp_config["transformer_layer_cls_to_wrap"] = self.fsdp_config.get(
"transformer_layer_cls_to_wrap", []
) + [self.fsdp_transformer_layer_cls_to_wrap]
if len(self.fsdp) == 0 and self.fsdp_config["min_num_params"] > 0:
warnings.warn("`min_num_params` is useful only when `--fsdp` is specified.")
if len(self.fsdp) == 0 and self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None:
warnings.warn("`transformer_layer_cls_to_wrap` is useful only when `--fsdp` is specified.")
if (
len(self.fsdp) > 0
and self.fsdp_config["min_num_params"] > 0
and self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None
):
raise ValueError("`min_num_params` and `transformer_layer_cls_to_wrap` are mutually exclusive.")
self.fsdp_config["xla"] = self.fsdp_config.get("xla", False)
self.fsdp_config["xla_fsdp_v2"] = self.fsdp_config.get("xla_fsdp_v2", False)
self.fsdp_config["xla_fsdp_grad_ckpt"] = self.fsdp_config.get("xla_fsdp_grad_ckpt", False)
if self.tp_size > 1:
os.environ["ACCELERATE_USE_TP"] = "true"
os.environ["TP_SIZE"] = str(self.tp_size)
# accelerate integration for FSDP
if len(self.fsdp) > 0 and not self.fsdp_config["xla"]:
os.environ["ACCELERATE_USE_FSDP"] = "true"
from accelerate.utils.constants import (
FSDP_AUTO_WRAP_POLICY,
FSDP_SHARDING_STRATEGY,
)
prefix = "FSDP_"
for fsdp_option in self.fsdp:
if fsdp_option.upper() in FSDP_SHARDING_STRATEGY:
# set environment variable for FSDP sharding strategy
os.environ[f"{prefix}SHARDING_STRATEGY"] = str(
FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1
)
elif fsdp_option == FSDPOption.OFFLOAD:
os.environ[f"{prefix}OFFLOAD_PARAMS"] = "true"
elif fsdp_option == FSDPOption.AUTO_WRAP:
os.environ[f"{prefix}AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[0]
if self.fsdp_config["min_num_params"] > 0:
os.environ[f"{prefix}MIN_NUM_PARAMS"] = str(self.fsdp_config["min_num_params"])
os.environ[f"{prefix}AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[1]
elif self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None:
os.environ[f"{prefix}TRANSFORMER_CLS_TO_WRAP"] = ",".join(
self.fsdp_config["transformer_layer_cls_to_wrap"]
)
prefetch_policy = self.fsdp_config.get("backward_prefetch", "NO_PREFETCH")
os.environ[f"{prefix}BACKWARD_PREFETCH"] = prefetch_policy.upper()
os.environ[f"{prefix}FORWARD_PREFETCH"] = str(self.fsdp_config.get("forward_prefetch", "false")).lower()
sync_module_states = str(self.fsdp_config.get("sync_module_states", "true")).lower()
cpu_ram_efficient_loading = str(self.fsdp_config.get("cpu_ram_efficient_loading", "false")).lower()
if sync_module_states == "false" and cpu_ram_efficient_loading == "true":
# In this case, all the processes except the main process would have random weights leading
# to unexpected behaviour during training, thus throwing error here to prevent it.
raise ValueError('`sync_module_states` must be `"True"` if `cpu_ram_efficient_loading` is `"True"`')
os.environ[f"{prefix}SYNC_MODULE_STATES"] = sync_module_states
os.environ[f"{prefix}CPU_RAM_EFFICIENT_LOADING"] = cpu_ram_efficient_loading
os.environ[f"{prefix}USE_ORIG_PARAMS"] = str(self.fsdp_config.get("use_orig_params", "true")).lower()
os.environ[f"{prefix}ACTIVATION_CHECKPOINTING"] = str(
self.fsdp_config.get("activation_checkpointing", "false")
)
if isinstance(self.debug, str):
self.debug = [DebugOption(s) for s in self.debug.split()]
elif self.debug is None:
self.debug = []
# This call to self.device is necessary to call _setup_devices so that
# torch.distributed is initialized
device_is_hpu = self.device.type == "hpu"
self.deepspeed_plugin = None
if self.deepspeed:
if not device_is_hpu:
raise ValueError("This version of DeepSpeed must be run on HPUs.")
# - must be run very last in arg parsing, since it will use a lot of these settings.
# - must be run before the model is created.
if not is_accelerate_available():
raise ValueError("--deepspeed requires Accelerate to be installed: `pip install accelerate`.")
from .integrations.deepspeed import GaudiTrainerDeepSpeedConfig
# will be used later by the Trainer
# note: leave self.deepspeed unmodified in case a user relies on it not to be modified)
self.hf_deepspeed_config = GaudiTrainerDeepSpeedConfig(self.deepspeed)
self.hf_deepspeed_config.trainer_config_process(self)
# Accelerate DeepSpeed Plugin
from accelerate.utils import DeepSpeedPlugin
os.environ["ACCELERATE_USE_DEEPSPEED"] = "true"
self.deepspeed_plugin = DeepSpeedPlugin(hf_ds_config=self.hf_deepspeed_config)
elif strtobool(os.environ.get("ACCELERATE_USE_DEEPSPEED", "false")):
# Accelerate DeepSpeed Plugin
from accelerate.utils import DeepSpeedPlugin
self.deepspeed_plugin = DeepSpeedPlugin()
mixed_precision = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
self.deepspeed_plugin.set_mixed_precision(mixed_precision)
self.deepspeed_plugin.set_deepspeed_weakref()
if self.use_cpu:
self.dataloader_pin_memory = False
if self.dataloader_num_workers == 0 and self.dataloader_prefetch_factor is not None:
raise ValueError(
"--dataloader_prefetch_factor can only be set when data is loaded in a different process, i.e."
" when --dataloader_num_workers > 1."
)
if self.push_to_hub_token is not None:
warnings.warn(
(
"`--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use "
"`--hub_token` instead."
),
FutureWarning,
)
self.hub_token = self.push_to_hub_token
if self.push_to_hub_model_id is not None:
self.hub_model_id = get_full_repo_name(
self.push_to_hub_model_id, organization=self.push_to_hub_organization, token=self.hub_token
)
if self.push_to_hub_organization is not None:
warnings.warn(
(
"`--push_to_hub_model_id` and `--push_to_hub_organization` are deprecated and will be removed"
" in version 5 of 🤗 Transformers. Use `--hub_model_id` instead and pass the full repo name to"
f" this argument (in this case {self.hub_model_id})."
),
FutureWarning,
)
else:
warnings.warn(
(
"`--push_to_hub_model_id` is deprecated and will be removed in version 5 of 🤗 Transformers."
" Use `--hub_model_id` instead and pass the full repo name to this argument (in this case"
f" {self.hub_model_id})."
),
FutureWarning,
)
elif self.push_to_hub_organization is not None:
self.hub_model_id = f"{self.push_to_hub_organization}/{Path(self.output_dir).name}"
warnings.warn(
(
"`--push_to_hub_organization` is deprecated and will be removed in version 5 of 🤗 Transformers."
" Use `--hub_model_id` instead and pass the full repo name to this argument (in this case"
f" {self.hub_model_id})."
),
FutureWarning,
)
if self.eval_use_gather_object and not is_accelerate_available("0.30.0"):
raise ValueError(
"--eval_use_gather_object requires Accelerate to be version of `accelerate` > 0.30.0."
"This is not supported and we recommend you to update your version."
)
if self.data_seed is not None:
if not is_accelerate_available("1.1.0"):
raise NotImplementedError(
"data_seed requires Accelerate version `accelerate` >= 1.1.0. "
"This is not supported and we recommend you to update your version."
)
if self.include_inputs_for_metrics:
logger.warning(
"Using `include_inputs_for_metrics` is deprecated and will be removed in version 5 of 🤗 Transformers. Please use `include_for_metrics` list argument instead."
)
self.include_for_metrics.append("inputs")