def register_quantization_args()

in src/optimum/nvidia/utils/cli.py [0:0]


def register_quantization_args(parser: ArgumentParser) -> ArgumentParser:
    parser.add_argument(
        "--fp8", action="store_true", help="Enable FP8 quantization for Ada & Hopper."
    )
    parser.add_argument(
        "--fp8-cache",
        action="store_true",
        help="Enable KV cache as fp8 for Ada & Hopper.",
    )
    parser.add_argument(
        "--num-calibration-samples",
        type=int,
        default=512,
        help="How much samples to use when calibrating.",
    )
    return parser