in optimum_benchmark/backends/onnxruntime/backend.py [0:0]
def load(self) -> None:
self.logger.info("\t+ Creating backend temporary directory")
self.tmpdir = TemporaryDirectory()
if self.config.no_weights:
self.logger.info("\t+ Creating no weights ORTModel")
self.create_no_weights_model_fast()
self.logger.info("\t+ Loading no weights ORTModel")
self.load_ortmodel_with_no_weights()
else:
self.logger.info("\t+ Loading pretrained ORTModel")
self.load_ortmodel_from_pretrained()
if self.is_optimized or self.is_quantized:
original_model, self.config.model = self.config.model, self.pretrained_model.model_save_dir
if self.is_optimized:
self.logger.info("\t+ Applying ORT optimization")
self.optimize_onnx_files()
self.config.model = self.optimized_model
if self.is_quantized:
self.logger.info("\t+ Applying ORT quantization")
self.quantize_onnx_files()
self.config.model = self.quantized_model
if self.is_optimized or self.is_quantized:
original_export, self.config.export = self.config.export, False
self.logger.info("\t+ Loading optimized/quantized model")
self.load_ortmodel_from_pretrained()
self.config.export = original_export
self.config.model = original_model
self.logger.info("\t+ Validating requested Execution Provider")
self.validate_execution_provider()
self.logger.info("\t+ Cleaning up backend temporary directory")
self.tmpdir.cleanup()