optimum/commands/export/executorch.py

# Copyright 2025 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Defines the command line for the export with ExecuTorch.""" from pathlib import Path from typing import TYPE_CHECKING from ...exporters import TasksManager from ..base import BaseOptimumCLICommand, CommandInfo if TYPE_CHECKING: from argparse import ArgumentParser def parse_args_executorch(parser): required_group = parser.add_argument_group("Required arguments") required_group.add_argument( "-m", "--model", type=str, required=True, help="Model ID on huggingface.co or path on disk to load model from.", ) required_group.add_argument( "-o", "--output_dir", type=Path, help="Path indicating the directory where to store the generated ExecuTorch model.", ) required_group.add_argument( "--task", type=str, default="text-generation", help=( "The task to export the model for. Available tasks depend on the model, but are among:" f" {str(TasksManager.get_all_tasks())}." ), ) required_group.add_argument( "--recipe", type=str, default="xnnpack", help='Pre-defined recipes for export to ExecuTorch. Defaults to "xnnpack".', ) required_group.add_argument( "--use_custom_sdpa", required=False, action="store_true", help="For decoder-only models to use custom sdpa with static kv cache to boost performance. Defaults to False.", ) required_group.add_argument( "--use_custom_kv_cache", required=False, action="store_true", help="For decoder-only models to use custom kv cache for static cache that updates cache using custom op. Defaults to False.", ) required_group.add_argument( "--qlinear", required=False, action="store_true", help="Quantization config for linear layers. If set, defaults to '8da4w' w/ groupsize 32.", ) required_group.add_argument( "--qembedding", required=False, action="store_true", help="Quantization config for embedding. If set, defaults to int8 channelwise.", ) class ExecuTorchExportCommand(BaseOptimumCLICommand): COMMAND = CommandInfo(name="executorch", help="Export models to ExecuTorch.") @staticmethod def parse_args(parser: "ArgumentParser"): return parse_args_executorch(parser) def run(self): from ...exporters.executorch import main_export kwargs = {} if self.args.use_custom_sdpa: kwargs["use_custom_sdpa"] = self.args.use_custom_sdpa if self.args.use_custom_kv_cache: kwargs["use_custom_kv_cache"] = self.args.use_custom_kv_cache if self.args.qlinear: kwargs["qlinear"] = self.args.qlinear if self.args.qembedding: kwargs["qembedding"] = self.args.qembedding main_export( model_name_or_path=self.args.model, task=self.args.task, recipe=self.args.recipe, output_dir=self.args.output_dir, **kwargs, )

optimum/commands/export/executorch.py (83 lines of code) (raw):