in phi3/olive/phi3.py [0:0]
def get_args(raw_args):
parser = argparse.ArgumentParser(description="phi3 optimization")
parser.add_argument(
"--model_path",
type=str,
default="microsoft/Phi-3-mini-4k-instruct",
help="Path to the model to optimize. Can be a hf model id or local path",
)
parser.add_argument(
"--source",
type=str,
default="HF",
choices=["HF", "AzureML"],
help=(
"Choose from HF(default), AzureML. If AzureML, model_path is overridden with the Phi-3-mini-4k-instruct"
" from azureml model registry"
),
)
parser.add_argument(
"--target",
type=str,
default=None,
required=True,
choices=TARGETS,
help="Choose from cpu, cuda, mobile or web",
)
parser.add_argument(
"--finetune_method",
type=str,
default=None,
choices=["qlora", "lora"],
help="Finetune method before onnxruntime optimization",
)
quant_group = parser.add_mutually_exclusive_group()
quant_group.add_argument(
"--quarot",
action="store_true",
help="Run QuaRot on a Hugging Face PyTorch model",
)
quant_group.add_argument(
"--awq",
action="store_true",
help="Run AWQ on the base model or the finetuned model",
)
parser.add_argument(
"--precision",
type=str,
default="int4",
choices=["fp32", "fp16", "int4"],
help=(
"Choose from fp32 or int4(default) for cpu target; "
"fp32 or fp16 or int4(default) for gpu target; int4(default) for mobile or web"
),
)
parser.add_argument(
"--inference",
action="store_true",
help="Run inference with optimized model",
)
parser.add_argument(
"--prompt",
nargs="*",
type=str,
default=["Write a joke"],
help="The prompt text fed into the model. Only used with --inference",
)
parser.add_argument(
"--chat_template",
type=unescaped_str,
default=None,
help=(
"The chat template for the prompt. If not provided, will use default templates for base and finetuned"
" models. Only used with --inference"
),
)
parser.add_argument(
"--max_length",
type=int,
default=200,
help="Max length for generation. Only used with --inference",
)
parser.add_argument("--output_dir", type=str, default="models/phi3", help="Output path for optimized model")
parser.add_argument(
"--cache_dir",
type=str,
default="cache",
help="Path to cache directory",
)
return parser.parse_args(raw_args)