in optimum_benchmark/backends/onnxruntime/backend.py [0:0]
def quantize_onnx_files(self) -> None:
self.logger.info("\t+ Attempting quantization")
self.quantized_model = f"{self.tmpdir.name}/quantized_model"
if self.is_calibrated and len(self.onnx_files_names) > 1:
raise NotImplementedError(
"Calibrated/Static Quantization is not supported for models with multiple components. "
f"Found {len(self.onnx_files_names)} components."
)
self.logger.info("\t+ Processing quantization config")
if self.config.auto_quantization is not None:
auto_quantization_config = format_quantization_config(self.config.auto_quantization_config)
auto_quantization_class = getattr(AutoQuantizationConfig, self.config.auto_quantization)
quantization_config = auto_quantization_class(**auto_quantization_config)
elif self.config.quantization:
quantization_config = format_quantization_config(self.config.quantization_config)
quantization_config = QuantizationConfig(**quantization_config)
if self.is_calibrated:
self.logger.info("\t+ Generating calibration dataset")
dataset_shapes = {"dataset_size": 2, "sequence_length": 2, "num_choices": 2}
calibration_dataset = DatasetGenerator(
task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes
)()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(self.pretrained_model.input_names))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
self.logger.info("\t+ Processing calibration config")
if self.config.auto_calibration is not None:
self.logger.info("\t+ Processing calibration config")
auto_calibration_method = getattr(AutoCalibrationConfig, self.config.auto_calibration)
calibration_config = auto_calibration_method(calibration_dataset, **self.config.auto_calibration_config)
elif self.config.calibration:
self.logger.info("\t+ Processing calibration config")
calibration_config = format_calibration_config(self.config.calibration_config)
calibration_config = CalibrationConfig(
dataset_name="calibration_dataset",
dataset_split=calibration_dataset.split,
dataset_num_samples=calibration_dataset.num_rows,
dataset_config_name=calibration_dataset.config_name,
**self.config.calibration_config,
)
for onnx_file_name in self.onnx_files_names:
self.logger.info(f"\t+ Creating quantizer for {onnx_file_name}")
quantizer = ORTQuantizer.from_pretrained(self.config.model, file_name=onnx_file_name)
if self.is_calibrated:
self.logger.info("\t+ Fitting calibration tensors range")
calibration_tensors_range = quantizer.fit(
dataset=calibration_dataset,
use_gpu=(self.config.device == "cuda"),
calibration_config=calibration_config,
operators_to_quantize=quantization_config.operators_to_quantize,
# TODO: add support for these (maybe)
use_external_data_format=False,
force_symmetric_range=False,
batch_size=1,
)
else:
calibration_tensors_range = None
self.logger.info("\t+ Quantizing model")
quantizer.quantize(
save_dir=self.quantized_model,
quantization_config=quantization_config,
calibration_tensors_range=calibration_tensors_range,
# TODO: add support for these (maybe)
use_external_data_format=False,
preprocessor=None,
file_suffix="",
)
if self.pretrained_processor is not None:
self.pretrained_processor.save_pretrained(self.quantized_model)
if self.pretrained_config is not None:
self.pretrained_config.save_pretrained(self.quantized_model)