in optimum/quanto/subpackage/commands/quantize.py [0:0]
def parse_quantize_args(parser: "ArgumentParser"):
required_group = parser.add_argument_group("Required arguments")
required_group.add_argument(
"output",
type=str,
help="The path to save the quantized model.",
)
required_group.add_argument(
"-m",
"--model",
type=str,
required=True,
help="Hugging Face Hub model id or path to a local model.",
)
required_group.add_argument(
"--weights",
type=str,
default="int8",
choices=["int2", "int4", "int8", "float8"],
help="The Hugging Face library to use to load the model.",
)
optional_group = parser.add_argument_group("Optional arguments")
optional_group.add_argument(
"--revision",
type=str,
default=None,
help="The Hugging Face model revision.",
)
optional_group.add_argument(
"--trust_remote_code",
action="store_true",
default=False,
help="Trust remote code when loading the model.",
)
optional_group.add_argument(
"--library",
type=str,
default=None,
choices=SUPPORTED_LIBRARIES,
help="The Hugging Face library to use to load the model.",
)
optional_group.add_argument(
"--task",
type=str,
default=None,
help="The model task (useful for models supporting multiple tasks).",
)
optional_group.add_argument(
"--torch_dtype",
type=str,
default="auto",
choices=["auto", "fp16", "bf16"],
help="The torch dtype to use when loading the model weights.",
)
optional_group.add_argument(
"--device",
type=str,
default="cpu",
help="The device to use when loading the model.",
)