in optimum/onnxruntime/runs/__init__.py [0:0]
def __init__(self, run_config):
super().__init__(run_config)
# Create the quantization configuration containing all the quantization parameters
qconfig = QuantizationConfig(
is_static=self.static_quantization,
format=QuantFormat.QDQ if self.static_quantization else QuantFormat.QOperator,
mode=QuantizationMode.QLinearOps if self.static_quantization else QuantizationMode.IntegerOps,
activations_dtype=QuantType.QInt8 if self.static_quantization else QuantType.QUInt8,
weights_dtype=QuantType.QInt8,
per_channel=run_config["per_channel"],
reduce_range=False,
operators_to_quantize=run_config["operators_to_quantize"],
)
onnx_model = ORT_SUPPORTED_TASKS[self.task]["class"][0].from_pretrained(
run_config["model_name_or_path"], export=True
)
trfs_model = FeaturesManager.get_model_from_feature(
onnx_model.export_feature, run_config["model_name_or_path"]
)
quantizer = ORTQuantizer.from_pretrained(onnx_model)
self.preprocessor = get_preprocessor(run_config["model_name_or_path"])
self.batch_sizes = run_config["batch_sizes"]
self.input_lengths = run_config["input_lengths"]
self.time_benchmark_args = run_config["time_benchmark_args"]
self.model_path = "model.onnx"
self.quantized_model_path = "model_quantized.onnx"
processing_class = task_processing_map[self.task]
self.task_processor = processing_class(
dataset_path=run_config["dataset"]["path"],
dataset_name=run_config["dataset"]["name"],
calibration_split=run_config["dataset"]["calibration_split"],
eval_split=run_config["dataset"]["eval_split"],
preprocessor=self.preprocessor,
data_keys=run_config["dataset"]["data_keys"],
ref_keys=run_config["dataset"]["ref_keys"],
task_args=run_config["task_args"],
static_quantization=self.static_quantization,
num_calibration_samples=(
run_config["calibration"]["num_calibration_samples"] if self.static_quantization else None
),
config=trfs_model.config,
max_eval_samples=run_config["max_eval_samples"],
)
self.metric_names = run_config["metrics"]
self.load_datasets()
quantization_preprocessor = QuantizationPreprocessor()
ranges = None
if self.static_quantization:
calibration_dataset = self.get_calibration_dataset()
calibrator = OnnxRuntimeCalibrator(
calibration_dataset,
quantizer,
self.model_path,
qconfig,
calibration_params=run_config["calibration"],
node_exclusion=run_config["node_exclusion"],
)
ranges, quantization_preprocessor = calibrator.fit()
# Export the quantized model
quantizer.quantize(
save_dir="./",
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
)
# onnxruntime benchmark
ort_session = ORTModel.load_model(
str(Path("./") / self.quantized_model_path),
)
# necessary to pass the config for the pipeline not to complain later
self.ort_model = task_ortmodel_map[self.task](ort_session, config=trfs_model.config)
# pytorch benchmark
model_class = FeaturesManager.get_model_class_for_feature(get_autoclass_name(self.task))
self.torch_model = model_class.from_pretrained(run_config["model_name_or_path"])
self.return_body[
"model_type"
] = self.torch_model.config.model_type # return_body is initialized in parent class