optimum/exporters/openvino/convert.py

# Copyright 2022 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import functools import gc import inspect import logging import os from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union from transformers.generation import GenerationMixin from transformers.models.speecht5.modeling_speecht5 import SpeechT5HifiGan from transformers.utils import is_tf_available, is_torch_available from openvino import Model, save_model from openvino.exceptions import OVTypeError from openvino.tools.ovc import convert_model from optimum.exporters import TasksManager from optimum.exporters.utils import ( DECODER_NAME, ENCODER_NAME, _get_submodels_for_export_encoder_decoder, get_diffusion_models_for_export, ) from optimum.exporters.utils import ( _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, ) from optimum.intel.utils.import_utils import ( _diffusers_version, _nncf_version, _open_clip_version, _optimum_intel_version, _optimum_version, _timm_version, _torch_version, _transformers_version, compare_versions, is_openvino_tokenizers_version, is_tokenizers_version, is_transformers_version, ) from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available from ...intel.utils.import_utils import is_nncf_available from ...intel.utils.modeling_utils import _infer_library_from_model_or_model_class from .model_patcher import patch_model_with_bettertransformer from .stateful import ( ensure_export_task_support_stateful, ensure_model_type_support_stateful, ensure_stateful_is_available, patch_stateful, ) from .utils import ( MULTI_MODAL_TEXT_GENERATION_MODELS, OV_XML_FILE_NAME, _get_input_info, _get_open_clip_submodels_fn_and_export_configs, allow_skip_tracing_check, clear_class_registry, remove_none_from_dummy_inputs, save_config, save_preprocessors, set_simplified_chat_template, ) logger = logging.getLogger(__name__) if is_torch_available(): import torch.nn as nn from transformers.modeling_utils import PreTrainedModel if is_diffusers_available(): from diffusers import DiffusionPipeline, ModelMixin if is_tf_available(): from transformers.modeling_tf_utils import TFPreTrainedModel if TYPE_CHECKING: from optimum.exporters.onnx.base import OnnxConfig from optimum.intel.openvino.configuration import OVConfig def _set_runtime_options( models_and_export_configs: Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"], ], task: str, library_name: str, quantized_model: bool, ): for model_name in models_and_export_configs.keys(): _, sub_export_config = models_and_export_configs[model_name] if not hasattr(sub_export_config, "runtime_options"): sub_export_config.runtime_options = {} if ( "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") or getattr(sub_export_config, "stateful", False) ): sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0" if not quantized_model and ( "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") or getattr(sub_export_config, "stateful", False) ): sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16" def _save_model( model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None, config: "OnnxConfig" = None, ): compress_to_fp16 = ov_config is not None and ov_config.dtype == "fp16" model = _add_version_info_to_model(model, library_name) runtime_options = config.runtime_options if hasattr(config, "runtime_options") else {} model = _add_runtime_options_to_rt_info(model, runtime_options) save_model(model, path, compress_to_fp16) del model gc.collect() def export( model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], config: "OnnxConfig", output: Path, opset: Optional[int] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ov_config: Optional["OVConfig"] = None, stateful: bool = True, patch_16bit_model: bool = False, library_name: Optional[str] = None, ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an OpenVINO Intermediate Representation. Args: model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): The model to export. config ([`~exporters.onnx.config.OnnxConfig`]): The ONNX configuration associated with the exported model. output (`Path`): Directory to store the exported model. opset (`Optional[int]`, defaults to `None`): The version of the ONNX operator set to use. device (`str`, *optional*, defaults to `cpu`): The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. ov_config (`OVConfig`, *optional*): The configuration containing the parameters related to quantization. input_shapes (`Optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. stateful (`bool`, defaults to `True`): Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs. Applicable only for decoder models. Returns: `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from the ONNX configuration. """ if not (is_torch_available() or is_tf_available()): raise ImportError( "Cannot convert because neither PyTorch nor TensorFlow are installed. " "Please install torch or tensorflow first." ) if "diffusers" in str(model.__class__) and not is_diffusers_available(): raise ImportError("The package `diffusers` is required to export diffusion models to OpenVINO.") if stateful: # This will be checked anyway after the model conversion, but checking it earlier will save time for a user if not suitable version is used stateful = ensure_stateful_is_available() if is_torch_available() and isinstance(model, nn.Module): return export_pytorch( model, config, opset, output, device=device, input_shapes=input_shapes, ov_config=ov_config, model_kwargs=model_kwargs, stateful=stateful, patch_16bit_model=patch_16bit_model, library_name=library_name, ) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) if opset is None: opset = config.DEFAULT_ONNX_OPSET if device == "cuda": raise RuntimeError("`tf2onnx` does not support export on CUDA device.") if input_shapes is not None: logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") return export_tensorflow(model, config, opset, output, ov_config=ov_config, library_name=library_name) else: raise RuntimeError( "You either provided a PyTorch model with only TensorFlow installed, or a TensorFlow model with only PyTorch installed." ) def export_tensorflow( model: Union["PreTrainedModel", "ModelMixin"], config: "OnnxConfig", opset: int, output: Path, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None, ): """ Export the TensorFlow model to OpenVINO format. Args: model (Union[): The model to export. config (OnnxConfig): The configuration of the model. opset (int): The ONNX opset version to use. output (Path): The path to save the model. Returns: input_names: list of input names from ONNX configuration output_names: list of output names from ONNX configuration bool: True if the model was exported successfully. """ from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx onnx_path = Path(output).with_suffix(".onnx") input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path) ov_model = convert_model(str(onnx_path)) library_name = _infer_library_from_model_or_model_class(model=model, library_name=library_name) _save_model( ov_model, output.parent / output, ov_config=ov_config, library_name=library_name, config=config, ) del ov_model gc.collect() return input_names, output_names, True def export_pytorch_via_onnx( model: Union["PreTrainedModel", "ModelMixin"], config: "OnnxConfig", opset: int, output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None, ): """ Exports a PyTorch model to an OpenVINO Intermediate Representation via ONNX export. Args: model ([`PreTrainedModel`]): The model to export. config ([`~exporters.onnx.config.OnnxConfig`]): The configuration associated with the exported model. opset (`int`): The version of the ONNX operator set to use. output (`Path`): Directory to store the exported model. device (`str`, defaults to `"cpu"`): The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. input_shapes (`optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. model_kwargs (optional[Dict[str, Any]], defaults to `None`): Additional kwargs for model export. ov_config (`OVConfig`, *optional*): The configuration containing the parameters related to quantization. Returns: `Tuple[List[str], List[str], bool]`: A tuple with an ordered list of the model's inputs, and the named inputs from the ONNX configuration and boolean flag - was legacy ONNX path were applied to model or not. """ import torch from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx output = Path(output) orig_torch_onnx_export = torch.onnx.export torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=False) model.config.torchscript = False model.config.return_dict = True onnx_output = output.with_suffix(".onnx") input_names, output_names = export_pytorch_to_onnx( model, config, opset, onnx_output, device, input_shapes, model_kwargs ) torch.onnx.export = orig_torch_onnx_export ov_model = convert_model(str(onnx_output)) library_name = _infer_library_from_model_or_model_class(model=model, library_name=library_name) _save_model( ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, ov_config=ov_config, library_name=library_name, config=config, ) del ov_model gc.collect() return input_names, output_names, True def export_pytorch( model: Union["PreTrainedModel", "ModelMixin"], config: "OnnxConfig", opset: int, output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ov_config: Optional["OVConfig"] = None, stateful: bool = False, patch_16bit_model: bool = False, library_name: Optional[str] = None, ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an OpenVINO Intermediate Representation. Args: model ([`PreTrainedModel`]): The model to export. config ([`~exporters.onnx.config.OnnxConfig`]): The configuration associated with the exported model. opset (`int`): The version of the ONNX operator set to use. output (`Path`): Directory to store the exported model. device (`str`, defaults to `"cpu"`): The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. input_shapes (`optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. model_kwargs (optional[Dict[str, Any]], defaults to `None`): Additional kwargs for model export ov_config (`OVConfig`, *optional*): The configuration containing the parameters related to quantization. stateful (`bool`, defaults to `False`): Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs. Applicable only for decoder models. Returns: `Tuple[List[str], List[str], bool]`: A tuple with an ordered list of the model's inputs, and the named inputs from the ONNX configuration and boolean flag - was legacy ONNX path were applied to model or not. """ import torch from torch.utils._pytree import tree_map from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder from optimum.exporters.utils import check_dummy_inputs_are_allowed logger.info(f"Using framework PyTorch: {torch.__version__}") output = Path(output) if stateful: # Trigger bettertransformer together with stateful model because OpenVINO HW-dependent transformations expect # both of them are applied to demonstrate the best performance. # TODO: Consider applying bettertransformer regardless of stateful flag -- requires additional validation. model = patch_model_with_bettertransformer(model) # TODO: Consider unpatching model after export is done in the end of this function. # Now it is left as-is because the model is not expected to be used after call export_pytorch, and # this function is one of the _internal_ steps in a bigger model conversion pipeline. with torch.no_grad(): if hasattr(model, "config"): model.config.torchscript = False model.config.return_dict = True model.eval() # Check if we need to override certain configuration item if config.values_override is not None: logger.info(f"Overriding {len(config.values_override)} configuration item(s)") for override_config_key, override_config_value in config.values_override.items(): logger.info(f"\t- {override_config_key} -> {override_config_value}") setattr(model.config, override_config_key, override_config_value) if input_shapes is None: input_shapes = {} # will use the defaults from DEFAULT_DUMMY_SHAPES # Check that inputs match, and order them properly dummy_inputs = config.generate_dummy_inputs(framework="pt", **input_shapes) device = torch.device(device) if device.type == "cuda" and torch.cuda.is_available(): model.to(device) dummy_inputs = tree_map( lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs ) dummy_inputs = config.rename_ambiguous_inputs(dummy_inputs) dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs) # TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching, # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output # To handle it, additional wrapper on patcher forward applied. # model.config.torchscript = True can not be used for patching, because it overrides return_dict to False patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) patched_forward = patcher.patched_forward dummy_input_keys = list(dummy_inputs.keys()) @functools.wraps(patched_forward) def ts_patched_forward(*args, **kwargs): ordered_example_inputs = [ param for param in inspect.signature( patcher.orig_forward if library_name != "sentence_transformers" else patcher.patched_forward ).parameters if param in dummy_input_keys ] kwargs.update(zip(ordered_example_inputs, args)) for i in range(len(dict_inputs)): input_name, keys = dict_inputs[i] tuple_input = kwargs[input_name] input_dict = dict(zip(keys, tuple_input)) kwargs[input_name] = input_dict outputs = patched_forward(**kwargs) return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()]) patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} model_config = getattr(model, "config", {}) model_type = getattr(model_config, "model_type", "").replace("_", "-") if allow_skip_tracing_check(library_name, model_type): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: if patch_16bit_model: from openvino.frontend.pytorch.patch_model import __make_16bit_traceable __make_16bit_traceable(model) check_dummy_inputs_are_allowed(model, dummy_inputs) input_info = _get_input_info(model, config, dummy_inputs) ts_decoder = TorchScriptPythonDecoder(model, example_input=dummy_inputs, **ts_decoder_kwargs) ov_model = convert_model( ts_decoder, example_input=dummy_inputs, input=[(item.shape, item.type) for item in input_info], ) ov_model.validate_nodes_and_infer_types() # TODO: remove as unnecessary validation? output_names = list(config.outputs.keys()) for idx, out_tensor in enumerate(ov_model.outputs): if idx < len(output_names): out_tensor.get_tensor().set_names({output_names[idx]}) input_names = [item.name for item in input_info] for idx, inp_tensor in enumerate(ov_model.inputs): input_name = input_names[idx] inp_tensor.get_tensor().set_names({input_name}) if stateful: patch_stateful(model.config, ov_model) library_name = _infer_library_from_model_or_model_class(model=model, library_name=library_name) _save_model( ov_model, output, ov_config=ov_config, library_name=library_name, config=config, ) clear_class_registry() del ov_model del model gc.collect() return input_names, output_names, False def export_models( models_and_export_configs: Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"] ], output_dir: Path, opset: Optional[int] = None, output_names: Optional[List[str]] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ov_config: Optional["OVConfig"] = None, stateful: bool = True, patch_16bit_model: bool = False, library_name: Optional[str] = None, ) -> Tuple[List[List[str]], List[List[str]]]: """ Export the models to OpenVINO IR format Args: models_and_export_configs (Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"]): output_dir (Path): output directory for saving models opset (Optional[int], optional, Default to None): ONNX export opset output_names (Optional[List[str]], optional, Defaults to None): model output names device (str, optional, Defaults to "cpu"): The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. input_shapes (Optional[Dict], optional, Defaults to None): If specified, allows to use specific shapes for the example input provided to the exporter. ov_config (`OVConfig`, *optional*): The configuration containing the parameters related to quantization. model_kwargs (Optional[Dict[str, Any]], optional): Additional kwargs for model export. stateful (`bool`, defaults to `True`) Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs. Applicable only for decoder models. Raises: ValueError: if custom names set not equal of number of models Returns: list of input_names and output_names from ONNX configuration """ outputs = [] if output_names is not None and len(output_names) != len(models_and_export_configs): raise ValueError( f"Provided custom names {output_names} for the export of {len(models_and_export_configs)} models. Please provide the same number of names as models to export." ) for i, model_name in enumerate(models_and_export_configs.keys()): submodel, sub_export_config = models_and_export_configs[model_name] output_name = output_names[i] if output_names is not None else Path(model_name + ".xml") output_path = output_dir / output_name output_path.parent.mkdir(parents=True, exist_ok=True) outputs.append( export( model=submodel, config=sub_export_config, output=output_path, opset=opset, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs, ov_config=ov_config, stateful=stateful[i] if isinstance(stateful, (list, tuple)) else stateful, patch_16bit_model=patch_16bit_model, library_name=library_name, ) ) outputs = list(map(list, zip(*outputs))) return outputs def export_from_model( model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], output: Union[str, Path], task: Optional[str] = None, ov_config: Optional["OVConfig"] = None, stateful: bool = True, opset: Optional[int] = None, model_kwargs: Optional[Dict[str, Any]] = None, custom_export_configs: Optional[Dict[str, "OnnxConfig"]] = None, fn_get_submodels: Optional[Callable] = None, preprocessors: List = None, device: str = "cpu", trust_remote_code: bool = False, patch_16bit_model: bool = False, **kwargs_shapes, ): model_kwargs = model_kwargs or {} if ov_config is not None and ov_config.quantization_config and not is_nncf_available(): raise ImportError( f"Compression of the weights to {ov_config.quantization_config} requires nncf, please install it with `pip install nncf`" ) library_name = _infer_library_from_model_or_model_class(model) if library_name != "open_clip": TasksManager.standardize_model_attributes(model) if hasattr(model.config, "export_model_type") and model.config.export_model_type is not None: model_type = model.config.export_model_type.replace("_", "-") else: model_type = (getattr(model.config, "model_type", None) or "").replace("_", "-") custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE if task is not None and task != "auto": task = TasksManager.map_from_synonym(task) else: try: task = TasksManager._infer_task_from_model_or_model_class(model=model) except (ValueError, KeyError) as e: raise RuntimeError( f"The model task could not be automatically inferred in `export_from_model`. Please provide the argument `task` with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" ) if ( not custom_architecture and library_name != "diffusers" and task + "-with-past" in TasksManager.get_supported_tasks_for_model_type(model_type, "openvino", library_name=library_name) ): # -with-past is the default. task = task + "-with-past" logger.info(f"Automatic task detection to: {task}.") is_encoder_decoder = getattr(getattr(model, "config", {}), "is_encoder_decoder", False) stateful = stateful and ( ensure_export_task_support_stateful(task) or ensure_model_type_support_stateful(model_type) ) if stateful and is_encoder_decoder and not getattr(model, "_supports_cache_class", False): stateful = False # TODO: support onnx_config.py in the model repo if custom_architecture and custom_export_configs is None: raise ValueError( f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom export configuration was passed as `custom_export_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum-intel/issues if you would like the model type {model_type} to be supported natively in the OpenVINO export." ) if task.startswith("text-generation") and model.config.is_encoder_decoder: raise ValueError( f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," f" referring to `optimum.exporters.tasks.TaskManager`'s `_TRANSFORMERS_TASKS_TO_MODEL_LOADERS`." ) if library_name != "diffusers" and model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: raise ValueError( f"{model_type} is not supported yet. Only {list(TasksManager._SUPPORTED_CLI_MODEL_TYPE.keys())} are supported. " f"If you want to support {model_type} please propose a PR or open up an issue." ) output = Path(output) if not output.exists(): output.mkdir(parents=True) # Get the shapes to be used to generate dummy inputs input_shapes = {} for input_name in DEFAULT_DUMMY_SHAPES.keys(): if input_name in ["height", "width"]: # use H and W from generator defaults continue input_shapes[input_name] = ( kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] ) if library_name == "open_clip": custom_architecture = True custom_export_configs, fn_get_submodels = _get_open_clip_submodels_fn_and_export_configs( model, library_name, task, preprocessors, custom_export_configs, fn_get_submodels ) if library_name == "diffusers": export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino") stateful_submodels = False elif stateful and is_encoder_decoder and not custom_architecture: export_config, models_and_export_configs = _get_encoder_decoder_stateful_models_for_export( model=model, task=task, preprocessors=preprocessors, library_name=library_name, _variant="default" ) stateful_submodels = [False, True] else: logging.disable(logging.INFO) export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs( model=model, task=task, monolith=False, custom_export_configs=custom_export_configs if custom_export_configs is not None else {}, custom_architecture=custom_architecture, fn_get_submodels=fn_get_submodels, preprocessors=preprocessors, library_name=library_name, model_kwargs=model_kwargs, _variant="default", legacy=False, exporter="openvino", stateful=stateful, ) logging.disable(logging.NOTSET) if library_name == "open_clip": if hasattr(model.config, "save_pretrained"): model.config.save_pretrained(output) for preprocess in preprocessors: if hasattr(preprocess, "save_pretrained"): preprocess.save_pretrained(output) files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()] elif library_name != "diffusers": if is_transformers_version(">=", "4.44.99"): # some model configs may have issues with loading without parameters initialization try: misplaced_generation_parameters = model.config._get_non_default_generation_parameters() except (KeyError, TypeError): misplaced_generation_parameters = {} if isinstance(model, GenerationMixin) and len(misplaced_generation_parameters) > 0: logger.warning( "Moving the following attributes in the config to the generation config: " f"{misplaced_generation_parameters}. You are seeing this warning because you've set " "generation parameters in the model config, as opposed to in the generation config.", ) for param_name, param_value in misplaced_generation_parameters.items(): setattr(model.generation_config, param_name, param_value) setattr(model.config, param_name, None) # Saving the model config and preprocessor as this is needed sometimes. save_config(model.config, output) generation_config = getattr(model, "generation_config", None) if generation_config is not None: try: generation_config.save_pretrained(output) except Exception as exception: logger.warning( f"The generation config will not be saved, saving failed with following error:\n{exception}" ) save_preprocessors(preprocessors, model.config, output, trust_remote_code) files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()] else: # save the subcomponent configuration for model_name in models_and_export_configs: subcomponent = models_and_export_configs[model_name][0] if hasattr(subcomponent, "save_config"): subcomponent.save_config(output / model_name) elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): subcomponent.config.save_pretrained(output / model_name) files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_export_configs] # Saving the additional components needed to perform inference. model.scheduler.save_pretrained(output.joinpath("scheduler")) feature_extractor = getattr(model, "feature_extractor", None) if feature_extractor is not None: feature_extractor.save_pretrained(output.joinpath("feature_extractor")) tokenizer = getattr(model, "tokenizer", None) if tokenizer is not None: tokenizer.save_pretrained(output.joinpath("tokenizer")) tokenizer_2 = getattr(model, "tokenizer_2", None) if tokenizer_2 is not None: tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) tokenizer_3 = getattr(model, "tokenizer_3", None) if tokenizer_3 is not None: tokenizer_3.save_pretrained(output.joinpath("tokenizer_3")) safety_checker = getattr(model, "safety_checker", None) if safety_checker is not None: safety_checker.save_pretrained(output.joinpath("safety_checker")) model.save_config(output) _set_runtime_options( models_and_export_configs, task, library_name, hasattr(ov_config, "quantization_config") and ov_config.quantization_config, ) export_models( models_and_export_configs=models_and_export_configs, output_dir=output, output_names=files_subpaths, input_shapes=input_shapes, device=device, ov_config=ov_config, stateful=stateful_submodels, opset=opset, model_kwargs=model_kwargs, patch_16bit_model=patch_16bit_model, library_name=library_name, ) return files_subpaths def export_tokenizer( tokenizer, output: Union[str, Path], suffix: Optional[str] = "", task: Optional[str] = None, processor_chat_template: Optional[str] = None, ): # avoid circular imports from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME from optimum.intel.openvino.utils import maybe_convert_tokenizer_to_fast try: from openvino_tokenizers import convert_tokenizer except ModuleNotFoundError: return if is_tokenizers_version(">", "0.19") and is_openvino_tokenizers_version("<", "2024.5.0.0"): logger.warning( "Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. " "Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO." ) if not isinstance(output, Path): output = Path(output) if output.exists(): tokenizer = maybe_convert_tokenizer_to_fast(tokenizer, output) if ( task is not None and (task.startswith("text-generation") or task == "image-text-to-text") and compare_versions("openvino-tokenizers", ">=", "2024.3.0.0") ): logger.info(f"Set tokenizer padding side to left for `{task}` task.") tokenizer.padding_side = "left" tokenizer.truncation_side = "left" try: converted = convert_tokenizer(tokenizer, with_detokenizer=True) set_simplified_chat_template(converted[0], processor_chat_template) except NotImplementedError: logger.info("Detokenizer is not supported, convert tokenizer only.") converted = convert_tokenizer(tokenizer, with_detokenizer=False) except OVTypeError: logger.debug(f"OpenVINO Tokenizer export for {type(tokenizer).__name__} is not supported.") return except Exception as exception: logger.debug( f"OpenVINO Tokenizer export for {type(tokenizer).__name__} is not supported. Exception: {exception}" ) return if not isinstance(converted, tuple): converted = (converted,) for model, file_name in zip(converted, (OV_TOKENIZER_NAME, OV_DETOKENIZER_NAME)): save_model(model, output / file_name.format(suffix)) def _add_runtime_options_to_rt_info(model: Model, options: Dict): """ Add runtime optinos """ try: for name, value in options.items(): model.set_rt_info(value, ["runtime_options", name]) except Exception: pass return model def _add_version_info_to_model(model: Model, library_name: Optional[str] = None): """ Add dependency versions to OpenVINO model """ try: model.set_rt_info(_transformers_version, ["optimum", "transformers_version"]) model.set_rt_info(_torch_version, ["optimum", "pytorch_version"]) model.set_rt_info(_optimum_intel_version, ["optimum", "optimum_intel_version"]) model.set_rt_info(_optimum_version, ["optimum", "optimum_version"]) if any("token_embeddings" in output.get_names() for output in model.outputs): import sentence_transformers model.set_rt_info(sentence_transformers.__version__, ["optimum", "sentence_transformers_version"]) if library_name == "diffusers": model.set_rt_info(_diffusers_version, ["optimum", "diffusers_version"]) elif library_name == "timm": model.set_rt_info(_timm_version, ["optimum", "timm_version"]) elif library_name == "open_clip": model.set_rt_info(_open_clip_version, ["optimum", "open_clip_version"]) rt_info = model.get_rt_info() if "nncf" in rt_info: model.set_rt_info(_nncf_version, ["optimum", "nncf_version"]) input_model = rt_info["conversion_parameters"].get("input_model", None) if input_model is not None and "onnx" in input_model.value: import onnx model.set_rt_info(onnx.__version__, ["optimum", "onnx_version"]) except Exception: pass return model def _get_multi_modal_submodels_and_export_configs( model: Union["PreTrainedModel", "TFPreTrainedModel"], task: str, library_name: str, int_dtype: str, float_dtype: str, preprocessors: Optional[List[Any]] = None, model_kwargs: Optional[Dict] = None, stateful: bool = True, ): models_for_export = {} stateful_parts = [] model_type = model.config.model_type.replace("_", "-") if model_type == "internvl-chat" and preprocessors is not None: model.config.img_context_token_id = preprocessors[0].convert_tokens_to_ids("<IMG_CONTEXT>") if model_type == "phi3-v": model.config.glb_GN = model.model.vision_embed_tokens.glb_GN.tolist() model.config.sub_GN = model.model.vision_embed_tokens.sub_GN.tolist() if model_type == "phi4mm": model.config.glb_GN = model.model.embed_tokens_extend.image_embed.glb_GN.tolist() model.config.sub_GN = model.model.embed_tokens_extend.image_embed.sub_GN.tolist() model.config.num_img_tokens = model.model.embed_tokens_extend.image_embed.num_img_tokens model.config.hd_transform_order = model.model.embed_tokens_extend.image_embed.hd_transform_order if model.config.img_processor is None: model.config.img_processor = model.model.embed_tokens_extend.image_embed.img_processor.config.to_dict() if model_type == "phi4-multimodal": model.config.glb_GN = model.model.embed_tokens_extend.image_embed.global_img_feature_extensor.tolist() model.config.sub_GN = model.model.embed_tokens_extend.image_embed.sub_img_feature_extensor.tolist() model.config.num_img_tokens = model.model.embed_tokens_extend.image_embed.num_img_tokens if hasattr(model, "image_newline"): model.config.image_newline = model.image_newline.tolist() if hasattr(model, "model") and hasattr(model.model, "image_newline"): model.config.image_newline = model.model.image_newline.tolist() main_config_cls = TasksManager.get_exporter_config_constructor( model=model, task=task, exporter="openvino", library_name=library_name ) main_config = main_config_cls( model.config, int_dtype=int_dtype, float_dtype=float_dtype, preprocessors=preprocessors ) for behavior in main_config.SUPPORTED_BEHAVIORS: model_id = f"{behavior}_model" model_part_config = main_config.with_behavior(behavior) model_part = main_config.get_model_for_behavior(model, behavior) models_for_export[model_id] = (model_part, model_part_config) stateful_parts.append(stateful if getattr(model_part_config, "use_past", False) else False) return main_config, models_for_export, stateful_parts def _get_submodels_and_export_configs( model: Union["PreTrainedModel", "TFPreTrainedModel", "DiffusionPipeline"], task: str, monolith: bool, custom_export_configs: Dict, custom_architecture: bool, _variant: str, library_name: str, int_dtype: str = "int64", float_dtype: str = "fp32", fn_get_submodels: Optional[Callable] = None, preprocessors: Optional[List[Any]] = None, legacy: bool = False, model_kwargs: Optional[Dict] = None, exporter: str = "openvino", stateful: bool = False, ): if ( not custom_architecture and library_name == "transformers" and model.config.model_type.replace("_", "-") in MULTI_MODAL_TEXT_GENERATION_MODELS ): return _get_multi_modal_submodels_and_export_configs( model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs, stateful ) elif not custom_architecture and library_name == "transformers" and model.config.model_type == "speecht5": return _get_speecht5_tss_model_for_export( model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs ) export_config, models_for_export = _default_get_submodels_and_export_configs( model, task, monolith, custom_export_configs, custom_architecture, _variant, library_name, int_dtype, float_dtype, fn_get_submodels, preprocessors, legacy, model_kwargs, exporter, ) stateful_per_model = [stateful] * len(models_for_export) return export_config, models_for_export, stateful_per_model def get_diffusion_models_for_export_ext( pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino" ): is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL") is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3") is_flux = pipeline.__class__.__name__.startswith("Flux") is_sana = pipeline.__class__.__name__.startswith("Sana") is_ltx_video = pipeline.__class__.__name__.startswith("LTX") is_sd = pipeline.__class__.__name__.startswith("StableDiffusion") and not is_sd3 is_lcm = pipeline.__class__.__name__.startswith("LatentConsistencyModel") if is_sd or is_sdxl or is_lcm: tokenizer = pipeline.tokenizer_2 if is_sdxl else pipeline.tokenizer model_max_length = getattr(tokenizer, "model_max_length", None) pipeline.unet.config.model_max_length = model_max_length models_for_export = get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter) if is_sdxl and pipeline.vae.config.force_upcast: models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"} models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"} # only SD 2.1 has overflow issue, it uses different prediction_type than other models if is_sd and pipeline.scheduler.config.prediction_type == "v_prediction": models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} elif is_sd3: models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype) elif is_flux: models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype) elif is_sana: models_for_export = get_sana_models_for_export(pipeline, exporter, int_dtype, float_dtype) elif is_ltx_video: models_for_export = get_ltx_video_models_for_export(pipeline, exporter, int_dtype, float_dtype) else: raise ValueError(f"Unsupported pipeline type `{pipeline.__class__.__name__}` provided") return None, models_for_export def get_ltx_video_models_for_export(pipeline, exporter, int_dtype, float_dtype): models_for_export = {} text_encoder = pipeline.text_encoder export_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="t5-encoder-model", ) export_config = export_config_constructor( text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype, ) export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder"] = (text_encoder, export_config) transformer = pipeline.transformer transformer.config.vae_temporal_compression_ratio = pipeline.vae_temporal_compression_ratio transformer.config.vae_spatial_compression_ratio = pipeline.vae_spatial_compression_ratio export_config_constructor = TasksManager.get_exporter_config_constructor( model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="ltx-video-transformer", ) transformer_export_config = export_config_constructor( transformer.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["transformer"] = (transformer, transformer_export_config) # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 vae_encoder = copy.deepcopy(pipeline.vae) vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="ltx-vae-encoder", ) vae_encoder_export_config = vae_config_constructor( vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) # VAE Decoder vae_decoder = copy.deepcopy(pipeline.vae) vae_decoder.register_to_config( **{ "latents_mean_data": vae_decoder.latents_mean.tolist(), "latents_std_data": vae_decoder.latents_std.tolist(), } ) vae_decoder.forward = lambda latent_sample, timestep=None: vae_decoder.decode(z=latent_sample, temb=timestep) vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="ltx-vae-decoder", ) vae_decoder_export_config = vae_config_constructor( vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) return models_for_export def get_sana_models_for_export(pipeline, exporter, int_dtype, float_dtype): models_for_export = {} text_encoder = pipeline.text_encoder text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="gemma2-text-encoder", ) text_encoder_export_config = text_encoder_config_constructor( pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) text_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config) transformer = pipeline.transformer transformer.config.text_encoder_projection_dim = transformer.config.caption_channels transformer.config.requires_aesthetics_score = False transformer.config.time_cond_proj_dim = None export_config_constructor = TasksManager.get_exporter_config_constructor( model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="sana-transformer", ) transformer_export_config = export_config_constructor( pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["transformer"] = (transformer, transformer_export_config) # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 vae_encoder = copy.deepcopy(pipeline.vae) vae_encoder.forward = lambda sample: {"latent": vae_encoder.encode(x=sample)["latent"]} vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="dcae-encoder", ) vae_encoder_export_config = vae_config_constructor( vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 vae_decoder = copy.deepcopy(pipeline.vae) vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="dcae-decoder", ) vae_decoder_export_config = vae_config_constructor( vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) return models_for_export def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype): models_for_export = {} # Text encoder text_encoder = getattr(pipeline, "text_encoder", None) if text_encoder is not None: text_encoder.config.output_hidden_states = True text_encoder.text_model.config.output_hidden_states = True text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="clip-text-with-projection", ) text_encoder_export_config = text_encoder_config_constructor( pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config) transformer = pipeline.transformer transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) transformer.config.time_cond_proj_dim = None export_config_constructor = TasksManager.get_exporter_config_constructor( model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="sd3-transformer", ) transformer_export_config = export_config_constructor( pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["transformer"] = (transformer, transformer_export_config) # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 vae_encoder = copy.deepcopy(pipeline.vae) vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="vae-encoder", ) vae_encoder_export_config = vae_config_constructor( vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 vae_decoder = copy.deepcopy(pipeline.vae) vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="vae-decoder", ) vae_decoder_export_config = vae_config_constructor( vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) text_encoder_2 = getattr(pipeline, "text_encoder_2", None) if text_encoder_2 is not None: text_encoder_2.config.output_hidden_states = True text_encoder_2.text_model.config.output_hidden_states = True export_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder_2, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="clip-text-with-projection", ) export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype) models_for_export["text_encoder_2"] = (text_encoder_2, export_config) text_encoder_3 = getattr(pipeline, "text_encoder_3", None) if text_encoder_3 is not None: export_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder_3, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="t5-encoder-model", ) export_config = export_config_constructor( text_encoder_3.config, int_dtype=int_dtype, float_dtype=float_dtype, ) export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder_3"] = (text_encoder_3, export_config) return models_for_export def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): models_for_export = {} # Text encoder text_encoder = getattr(pipeline, "text_encoder", None) if text_encoder is not None: text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="clip-text-model", ) text_encoder_export_config = text_encoder_config_constructor( pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config) transformer = pipeline.transformer transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) transformer.config.time_cond_proj_dim = None export_config_constructor = TasksManager.get_exporter_config_constructor( model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="flux-transformer", ) transformer_export_config = export_config_constructor( pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype ) transformer_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["transformer"] = (transformer, transformer_export_config) # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 vae_encoder = copy.deepcopy(pipeline.vae) vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="vae-encoder", ) vae_encoder_export_config = vae_config_constructor( vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 vae_decoder = copy.deepcopy(pipeline.vae) vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, exporter=exporter, library_name="diffusers", task="semantic-segmentation", model_type="vae-decoder", ) vae_decoder_export_config = vae_config_constructor( vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) text_encoder_2 = getattr(pipeline, "text_encoder_2", None) if text_encoder_2 is not None: export_config_constructor = TasksManager.get_exporter_config_constructor( model=text_encoder_2, exporter=exporter, library_name="diffusers", task="feature-extraction", model_type="t5-encoder-model", ) export_config = export_config_constructor( text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype, ) export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder_2"] = (text_encoder_2, export_config) return models_for_export def _get_encoder_decoder_stateful_models_for_export( model: Union["PreTrainedModel", "TFPreTrainedModel"], task: str, _variant: str, library_name: str, int_dtype: str = "int64", float_dtype: str = "fp32", preprocessors: Optional[List[Any]] = None, ): export_config_constructor = TasksManager.get_exporter_config_constructor( model=model, exporter="openvino", task=task, library_name=library_name ) export_config = export_config_constructor( model.config, int_dtype=int_dtype, float_dtype=float_dtype, preprocessors=preprocessors, legacy=False, ) export_config.variant = _variant all_variants = "\n".join([f" - {name}: {description}" for name, description in export_config.VARIANTS.items()]) logger.info(f"Using the export variant {export_config.variant}. Available variants are:\n{all_variants}") models_for_export = _get_submodels_for_export_encoder_decoder(model, use_past=False) encoder_export_config = export_config.with_behavior("encoder") models_for_export[ENCODER_NAME] = (models_for_export[ENCODER_NAME], encoder_export_config) decoder_export_config_with_past = export_config.with_behavior("decoder", use_past=True, use_past_in_inputs=True) decoder_export_config_with_past.stateful = True models_for_export[DECODER_NAME] = ( models_for_export[DECODER_NAME], decoder_export_config_with_past, ) return None, models_for_export def _get_speecht5_tss_model_for_export( model: Union["PreTrainedModel", "TFPreTrainedModel"], task: str, library_name: str, int_dtype: str, float_dtype: str, preprocessors: Optional[List[Any]] = None, model_kwargs: Optional[Dict] = None, ): if model_kwargs is None or "vocoder" not in model_kwargs: raise ValueError( 'The export of SpeechT5 requires a vocoder. Please pass `--model-kwargs \'{"vocoder": "vocoder_model_name_or_path"}\'` from the command line, or `model_kwargs={"vocoder": "vocoder_model_name_or_path"}` if calling main_export.' ) vocoder_id = model_kwargs["vocoder"] # prepare export config export_config_constructor = TasksManager.get_exporter_config_constructor( model=model, exporter="openvino", task=task, library_name=library_name ) export_config = export_config_constructor( model.config, int_dtype=int_dtype, float_dtype=float_dtype, preprocessors=preprocessors, legacy=False, ) export_config.variant = "default" models_for_export = {} encoder_export_config = export_config.with_behavior("encoder") decoder_export_config = export_config.with_behavior("decoder") postnet_export_config = export_config.with_behavior("postnet") vocoder_export_config = export_config.with_behavior("vocoder") vocoder = SpeechT5HifiGan.from_pretrained(vocoder_id).eval() models_for_export[ENCODER_NAME] = (model.speecht5.encoder, encoder_export_config) models_for_export[DECODER_NAME] = (model, decoder_export_config) models_for_export["postnet"] = (model, postnet_export_config) models_for_export["vocoder"] = (vocoder, vocoder_export_config) stateful_per_model = [False, True, False, False] return export_config, models_for_export, stateful_per_model

optimum/exporters/openvino/convert.py (1,112 lines of code) (raw):