in optimum/exporters/openvino/model_configs.py [0:0]
def init_model_configs():
if "open_clip" not in TasksManager._LIBRARY_TO_SUPPORTED_MODEL_TYPES:
TasksManager._LIBRARY_TO_SUPPORTED_MODEL_TYPES["open_clip"] = {}
TasksManager._CUSTOM_CLASSES[("pt", "llava", "image-text-to-text")] = (
"transformers",
"LlavaForConditionalGeneration",
)
TasksManager._CUSTOM_CLASSES[("pt", "llava-next", "image-text-to-text")] = (
"transformers",
"LlavaNextForConditionalGeneration",
)
TasksManager._CUSTOM_CLASSES[("pt", "qwen2-vl", "image-text-to-text")] = (
"transformers",
"Qwen2VLForConditionalGeneration",
)
TasksManager._CUSTOM_CLASSES[("pt", "qwen2-5-vl", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)
TasksManager._CUSTOM_CLASSES[("pt", "llava-next-video", "image-text-to-text")] = (
"transformers",
"AutoModelForVision2Seq",
)
TasksManager._CUSTOM_CLASSES[("pt", "gemma3", "image-text-to-text")] = (
"transformers",
"Gemma3ForConditionalGeneration",
)
TasksManager._CUSTOM_CLASSES[("pt", "idefics3", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)
TasksManager._CUSTOM_CLASSES[("pt", "smolvlm", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)
TasksManager._CUSTOM_CLASSES[("pt", "phi4mm", "image-text-to-text")] = ("transformers", "AutoModelForCausalLM")
TasksManager._CUSTOM_CLASSES[("pt", "phi4mm", "automatic-speech-recognition")] = (
"transformers",
"AutoModelForCausalLM",
)
TasksManager._CUSTOM_CLASSES[("pt", "phi4-multimodal", "image-text-to-text")] = (
"transformers",
"AutoModelForCausalLM",
)
TasksManager._CUSTOM_CLASSES[("pt", "phi4-multimodal", "automatic-speech-recognition")] = (
"transformers",
"AutoModelForCausalLM",
)
TasksManager._CUSTOM_CLASSES[("pt", "llama4", "image-text-to-text")] = (
"transformers",
"AutoModelForImageTextToText",
)
TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS[
"image-text-to-text"
] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"]
TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["video-text-to-text"] = "AutoModelForVision2Seq"
if is_diffusers_available() and "fill" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS:
TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["fill"] = "FluxFillPipeline"
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["fill"] = {"flux": "FluxFillPipeline"}
TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["text-to-image"] = ("AutoPipelineForText2Image", "SanaPipeline")
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["text-to-image"]["sana"] = "SanaPipeline"
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["text-to-image"]["sana-sprint"] = "SanaSprintPipeline"
if is_diffusers_available() and "text-to-video" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS:
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["text-to-video"] = {}
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["text-to-video"]["ltx-video"] = "LTXPipeline"
supported_model_types = [
"_SUPPORTED_MODEL_TYPE",
"_DIFFUSERS_SUPPORTED_MODEL_TYPE",
"_TIMM_SUPPORTED_MODEL_TYPE",
"_SENTENCE_TRANSFORMERS_SUPPORTED_MODEL_TYPE",
]
for supported_models_config in supported_model_types:
supported_models = getattr(TasksManager, supported_models_config)
for model, export_configs in supported_models.items():
if "onnx" not in export_configs:
continue
onnx_config = export_configs["onnx"]
supported_models[model]["openvino"] = deepcopy(onnx_config)
setattr(TasksManager, supported_models_config, supported_models)