in src/optimum/nvidia/utils/cli.py [0:0]
def postprocess_quantization_parameters(params: Namespace) -> Namespace:
# Only support FP8 quantization for now
qconfig = QuantMode.from_description(
quantize_weights=False,
quantize_activations=False,
per_token=False,
per_channel=False,
per_group=False,
use_int4_weights=False,
use_int8_kv_cache=False,
use_fp8_kv_cache=params.fp8_cache,
use_fp8_qdq=params.fp8,
)
params.has_quantization_step = qconfig != QuantMode(0)
params.quantization_config = qconfig
# If we do have the output path, then let's create the calibration path
if "output" in params:
from pathlib import Path
params.calibration_output = Path(params.output).joinpath("calibration")
return params