in optimum/commands/export/executorch.py [0:0]
def parse_args_executorch(parser):
required_group = parser.add_argument_group("Required arguments")
required_group.add_argument(
"-m",
"--model",
type=str,
required=True,
help="Model ID on huggingface.co or path on disk to load model from.",
)
required_group.add_argument(
"-o",
"--output_dir",
type=Path,
help="Path indicating the directory where to store the generated ExecuTorch model.",
)
required_group.add_argument(
"--task",
type=str,
default="text-generation",
help=(
"The task to export the model for. Available tasks depend on the model, but are among:"
f" {str(TasksManager.get_all_tasks())}."
),
)
required_group.add_argument(
"--recipe",
type=str,
default="xnnpack",
help='Pre-defined recipes for export to ExecuTorch. Defaults to "xnnpack".',
)
required_group.add_argument(
"--use_custom_sdpa",
required=False,
action="store_true",
help="For decoder-only models to use custom sdpa with static kv cache to boost performance. Defaults to False.",
)
required_group.add_argument(
"--use_custom_kv_cache",
required=False,
action="store_true",
help="For decoder-only models to use custom kv cache for static cache that updates cache using custom op. Defaults to False.",
)
required_group.add_argument(
"--qlinear",
required=False,
action="store_true",
help="Quantization config for linear layers. If set, defaults to '8da4w' w/ groupsize 32.",
)
required_group.add_argument(
"--qembedding",
required=False,
action="store_true",
help="Quantization config for embedding. If set, defaults to int8 channelwise.",
)