def parse_args_onnxruntime_quantize()

in optimum/commands/onnxruntime/quantize.py [0:0]


def parse_args_onnxruntime_quantize(parser: "ArgumentParser"):
    required_group = parser.add_argument_group("Required arguments")
    required_group.add_argument(
        "--onnx_model",
        type=Path,
        required=True,
        help="Path to the repository where the ONNX models to quantize are located.",
    )
    required_group.add_argument(
        "-o",
        "--output",
        type=Path,
        required=True,
        help="Path to the directory where to store generated ONNX model.",
    )

    optional_group = parser.add_argument_group("Optional arguments")
    optional_group.add_argument(
        "--per_channel",
        action="store_true",
        help="Compute the quantization parameters on a per-channel basis.",
    )

    level_group = parser.add_mutually_exclusive_group(required=True)
    level_group.add_argument("--arm64", action="store_true", help="Quantization for the ARM64 architecture.")
    level_group.add_argument("--avx2", action="store_true", help="Quantization with AVX-2 instructions.")
    level_group.add_argument("--avx512", action="store_true", help="Quantization with AVX-512 instructions.")
    level_group.add_argument(
        "--avx512_vnni", action="store_true", help="Quantization with AVX-512 and VNNI instructions."
    )
    level_group.add_argument("--tensorrt", action="store_true", help="Quantization for NVIDIA TensorRT optimizer.")
    level_group.add_argument(
        "-c",
        "--config",
        type=Path,
        help="`ORTConfig` file to use to optimize the model.",
    )