in optimum/exporters/executorch/tasks/seq2seq_lm.py [0:0]
def load_seq2seq_lm_model(model_name_or_path: str, **kwargs) -> Seq2SeqLMExportableModule:
"""
Loads a seq2seq language model for conditional text generation and registers it under the task
'text2text-generation' using Hugging Face's `AutoModelForSeq2SeqLM`.
Args:
model_name_or_path (str):
Model ID on huggingface.co or path on disk to the model repository to export. For example:
`model_name_or_path="google-t5/t5-small"` or `mode_name_or_path="/path/to/model_folder`
**kwargs:
Additional configuration options for the model:
- dtype (str, optional):
Data type for model weights (default: "float32").
Options include "float16" and "bfloat16".
- max_hidden_seq_length (int, optional):
Maximum hidden sequence length (default: 4096).
- max_cache_length (int, optional):
Maximum sequence length for generation (default: 1024).
Returns:
Seq2SeqLMExportableModule:
An instance of `Seq2SeqLMExportableModule` for exporting and lowering to ExecuTorch.
"""
device = "cpu"
batch_size = 1
max_hidden_seq_length = kwargs.get("max_hidden_seq_length", 4096)
max_cache_length = kwargs.get("max_cache_length", 1024)
full_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path).to(device).eval()
return Seq2SeqLMExportableModule(
full_model,
batch_size=batch_size,
max_hidden_seq_length=max_hidden_seq_length,
max_cache_length=max_cache_length,
)