optimum_benchmark/backends/openvino/backend.py

from collections import OrderedDict from tempfile import TemporaryDirectory from typing import Any, Dict import torch from hydra.utils import get_class from ...import_utils import is_accelerate_available, is_torch_distributed_available from ..base import Backend from ..transformers_utils import fast_weights_init from .config import OVConfig as OVBackendConfig from .utils import TASKS_TO_OVMODELS, TASKS_TO_OVPIPELINES if is_accelerate_available(): from accelerate import Accelerator if is_torch_distributed_available(): import torch.distributed class OVBackend(Backend[OVBackendConfig]): NAME: str = "openvino" def __init__(self, config: OVBackendConfig) -> None: super().__init__(config) if self.config.library != "diffusers" and self.config.task in TASKS_TO_OVMODELS: self.ovmodel_class = get_class(TASKS_TO_OVMODELS[self.config.task]) self.logger.info(f"\t+ Using OVModel class {self.ovmodel_class.__name__}") elif self.config.library == "diffusers" and self.config.task in TASKS_TO_OVPIPELINES: self.ovmodel_class = get_class(TASKS_TO_OVPIPELINES[self.config.task]) self.logger.info(f"\t+ Using OVDiffusionPipeline class {self.ovmodel_class.__name__}") else: raise NotImplementedError(f"OVBackend does not support task {self.config.task}") def load(self) -> None: self.logger.info("\t+ Creating backend temporary directory") self.tmpdir = TemporaryDirectory() if self.config.no_weights: self.logger.info("\t+ Creating no weights OVModel") self.create_no_weights_model_fast() self.logger.info("\t+ Loading no weights OVModel") self.load_ovmodel_with_no_weights() else: self.logger.info("\t+ Loading pretrained OVModel") self.load_ovmodel_from_pretrained() if self.config.reshape: self.logger.info("\t+ Reshaping model with static shapes") self.pretrained_model.reshape(**self.config.reshape_kwargs) if self.config.half: self.logger.info("\t+ Converting model to half precision") self.pretrained_model.half() if self.config.reshape or self.config.half: self.logger.info("\t+ Compiling model") self.pretrained_model.compile() self.tmpdir.cleanup() def load_ovmodel_from_pretrained(self) -> None: self.pretrained_model = self.ovmodel_class.from_pretrained( self.config.model, **self.config.model_kwargs, **self.ovmodel_kwargs, ) def load_ovmodel_with_no_weights(self) -> None: with fast_weights_init(): original_model, self.config.model = self.config.model, self.no_weights_model_path.as_posix() original_export, self.config.export = self.config.export, True self.load_ovmodel_from_pretrained() self.config.export = original_export self.config.model = original_model @property def ovmodel_kwargs(self) -> Dict[str, Any]: kwargs = {} if self.config.export is not None: kwargs["export"] = self.config.export if self.config.use_cache is not None: kwargs["use_cache"] = self.config.use_cache if self.config.use_merged is not None: kwargs["use_merged"] = self.config.use_merged if self.config.load_in_8bit is not None: kwargs["load_in_8bit"] = self.config.load_in_8bit if self.config.load_in_4bit is not None: kwargs["load_in_4bit"] = self.config.load_in_4bit if self.config.ov_config: kwargs["ov_config"] = self.config.ov_config return kwargs @property def split_between_processes(self) -> bool: return is_torch_distributed_available() and torch.distributed.is_initialized() def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: if self.split_between_processes: with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: inputs = process_inputs for key in list(inputs.keys()): if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names: inputs.pop(key) return inputs def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: return self.pretrained_model.forward(**inputs, **kwargs) def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: return self.pretrained_model.generate(**inputs, **kwargs) def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: return self.pretrained_model.generate(**inputs, **kwargs) def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: return self.pretrained_model(**inputs, **kwargs)

optimum_benchmark/backends/openvino/backend.py (95 lines of code) (raw):