in optimum_benchmark/backends/onnxruntime/backend.py [0:0]
def optimize_onnx_files(self) -> None:
self.logger.info("\t+ Attempting optimization")
self.optimized_model = os.path.join(self.tmpdir.name, "optimized")
self.logger.info("\t+ Processing optimization config")
if self.config.auto_optimization is not None:
optimization_config = AutoOptimizationConfig.with_optimization_level(
optimization_level=self.config.auto_optimization,
for_gpu=(self.config.device == "cuda"),
**self.config.auto_optimization_config,
)
elif self.config.optimization:
optimization_config = OptimizationConfig(
optimize_for_gpu=(self.config.device == "cuda"), **self.config.optimization_config
)
self.logger.info("\t+ Creating optimizer")
optimizer = ORTOptimizer.from_pretrained(self.config.model, file_names=self.onnx_files_names)
self.logger.info("\t+ Optimizing ORTModel")
optimizer.optimize(
optimization_config,
save_dir=self.optimized_model,
# TODO: add support for these
use_external_data_format=None,
one_external_file=True,
file_suffix="",
)
if self.pretrained_processor is not None:
self.pretrained_processor.save_pretrained(self.optimized_model)
if self.pretrained_config is not None:
self.pretrained_config.save_pretrained(self.optimized_model)