optimum_benchmark/task_utils.py (266 lines of code) (raw):

import json import os from typing import Optional from .hub_utils import HF_API from .import_utils import is_diffusers_available, is_torch_available, is_transformers_available TASKS_TO_AUTO_MODEL_CLASS_NAMES = { # text processing "feature-extraction": "AutoModel", "fill-mask": "AutoModelForMaskedLM", "multiple-choice": "AutoModelForMultipleChoice", "question-answering": "AutoModelForQuestionAnswering", "token-classification": "AutoModelForTokenClassification", "text-classification": "AutoModelForSequenceClassification", # audio processing "audio-xvector": "AutoModelForAudioXVector", "text-to-audio": "AutoModelForTextToSpectrogram", "audio-classification": "AutoModelForAudioClassification", "audio-frame-classification": "AutoModelForAudioFrameClassification", # image processing "mask-generation": "AutoModel", "image-to-image": "AutoModelForImageToImage", "masked-im": "AutoModelForMaskedImageModeling", "object-detection": "AutoModelForObjectDetection", "depth-estimation": "AutoModelForDepthEstimation", "image-segmentation": "AutoModelForImageSegmentation", "image-classification": "AutoModelForImageClassification", "semantic-segmentation": "AutoModelForSemanticSegmentation", "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection", "zero-shot-image-classification": "AutoModelForZeroShotImageClassification", # text generation "image-to-text": "AutoModelForVision2Seq", "text-generation": "AutoModelForCausalLM", "text2text-generation": "AutoModelForSeq2SeqLM", "image-text-to-text": "AutoModelForImageTextToText", "visual-question-answering": "AutoModelForVisualQuestionAnswering", "automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"), } TASKS_TO_AUTO_PIPELINE_CLASS_NAMES = { "inpainting": "AutoPipelineForInpainting", "text-to-image": "AutoPipelineForText2Image", "image-to-image": "AutoPipelineForImage2Image", } TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES = {} if is_transformers_available() and is_torch_available(): import transformers for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name] = {} if isinstance(auto_model_class_names, str): auto_model_class_names = (auto_model_class_names,) for auto_model_class_name in auto_model_class_names: auto_model_class = getattr(transformers, auto_model_class_name, None) if auto_model_class is not None: TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name].update( auto_model_class._model_mapping._model_mapping ) TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {} if is_diffusers_available(): import diffusers if hasattr(diffusers, "pipelines") and hasattr(diffusers.pipelines, "auto_pipeline"): from diffusers.pipelines.auto_pipeline import ( AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, AUTO_INPAINT_PIPELINES_MAPPING, AUTO_TEXT2IMAGE_PIPELINES_MAPPING, ) TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = { "inpainting": AUTO_INPAINT_PIPELINES_MAPPING.copy(), "text-to-image": AUTO_TEXT2IMAGE_PIPELINES_MAPPING.copy(), "image-to-image": AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.copy(), } for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): for pipeline_type, pipeline_class in pipeline_mapping.items(): # diffusers does not have a mappings with just class names TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES[task_name][pipeline_type] = pipeline_class.__name__ IMAGE_DIFFUSION_TASKS = [ "inpainting", "text-to-image", "image-to-image", ] TEXT_GENERATION_TASKS = [ "image-to-text", "conversational", "text-generation", "image-text-to-text", "text2text-generation", "automatic-speech-recognition", ] TEXT_EMBEDDING_TASKS = [ "feature-extraction", "sentence-similarity", ] SYNONYM_TASKS = { "masked-lm": "fill-mask", "causal-lm": "text-generation", "default": "feature-extraction", "vision2seq-lm": "image-to-text", "text-to-speech": "text-to-audio", "seq2seq-lm": "text2text-generation", "translation": "text2text-generation", "summarization": "text2text-generation", "mask-generation": "feature-extraction", "audio-ctc": "automatic-speech-recognition", "sentence-similarity": "feature-extraction", "speech2seq-lm": "automatic-speech-recognition", "sequence-classification": "text-classification", "zero-shot-classification": "text-classification", } SYNONYM_LIBRARIES = { "sentence-transformers": "transformers", } def map_from_synonym_task(task: str) -> str: if task in SYNONYM_TASKS: task = SYNONYM_TASKS[task] return task def map_from_synonym_library(library: str) -> str: if library in SYNONYM_LIBRARIES: library = SYNONYM_LIBRARIES[library] return library def is_hf_hub_repo(model_name_or_path: str, token: Optional[str] = None) -> bool: try: return HF_API.repo_exists(model_name_or_path, token=token) except Exception: return False def is_local_dir_repo(model_name_or_path: str) -> bool: return os.path.isdir(model_name_or_path) def get_repo_config( model_name_or_path: str, config_name: str, token: Optional[str] = None, revision: Optional[str] = None, cache_dir: Optional[str] = None, ): if is_hf_hub_repo(model_name_or_path, token=token): config = json.load( open( HF_API.hf_hub_download( repo_id=model_name_or_path, filename=config_name, cache_dir=cache_dir, revision=revision, token=token, ), mode="r", ) ) elif is_local_dir_repo(model_name_or_path): config = json.load( open( os.path.join(model_name_or_path, config_name), mode="r", ) ) else: raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.") return config def get_repo_files(model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None): if is_hf_hub_repo(model_name_or_path, token=token): repo_files = HF_API.list_repo_files(model_name_or_path, revision=revision, token=token) elif is_local_dir_repo(model_name_or_path): repo_files = os.listdir(model_name_or_path) else: raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.") return repo_files def infer_library_from_model_name_or_path( model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None, cache_dir: Optional[str] = None, ) -> str: inferred_library_name = None repo_files = get_repo_files(model_name_or_path, token=token, revision=revision) if "model_index.json" in repo_files: inferred_library_name = "diffusers" elif "config_sentence_transformers.json" in repo_files: inferred_library_name = "sentence-transformers" elif "config.json" in repo_files: config_dict = get_repo_config( model_name_or_path, "config.json", token=token, revision=revision, cache_dir=cache_dir ) if "pretrained_cfg" in config_dict: inferred_library_name = "timm" else: inferred_library_name = "transformers" elif any(file.endswith(".gguf") or file.endswith(".GGUF") for file in repo_files): inferred_library_name = "llama_cpp" if inferred_library_name is None: raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.") return map_from_synonym_library(inferred_library_name) def infer_task_from_model_name_or_path( model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None, cache_dir: Optional[str] = None, library_name: Optional[str] = None, ) -> str: inferred_task_name = None if library_name is None: library_name = infer_library_from_model_name_or_path( model_name_or_path, revision=revision, token=token, cache_dir=cache_dir ) if library_name == "llama_cpp": inferred_task_name = "text-generation" elif library_name == "timm": inferred_task_name = "image-classification" elif library_name == "transformers": transformers_config = get_repo_config( model_name_or_path, "config.json", token=token, revision=revision, cache_dir=cache_dir ) target_class_name = transformers_config["architectures"][0] for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items(): for _, model_class_name in model_mapping.items(): if target_class_name == model_class_name: inferred_task_name = task_name break if inferred_task_name is not None: break if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for target class name {target_class_name}.") elif library_name == "diffusers": diffusers_config = get_repo_config( model_name_or_path, "model_index.json", token=token, revision=revision, cache_dir=cache_dir ) target_class_name = diffusers_config["_class_name"] for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): for _, pipeline_class_name in pipeline_mapping.items(): if target_class_name == pipeline_class_name or (pipeline_class_name in target_class_name): inferred_task_name = task_name break if inferred_task_name is not None: break if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for target class name {target_class_name}.") return map_from_synonym_task(inferred_task_name) def infer_model_type_from_model_name_or_path( model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None, cache_dir: Optional[str] = None, library_name: Optional[str] = None, ) -> str: inferred_model_type = None if library_name is None: library_name = infer_library_from_model_name_or_path( model_name_or_path, revision=revision, token=token, cache_dir=cache_dir ) if library_name == "llama_cpp": inferred_model_type = "llama_cpp" elif library_name == "timm": timm_config = get_repo_config( model_name_or_path, "config.json", token=token, revision=revision, cache_dir=cache_dir ) inferred_model_type = timm_config["architecture"] elif library_name == "transformers": transformers_config = get_repo_config( model_name_or_path, "config.json", token=token, revision=revision, cache_dir=cache_dir ) inferred_model_type = transformers_config["model_type"] elif library_name == "diffusers": diffusers_config = get_repo_config( model_name_or_path, "model_index.json", token=token, revision=revision, cache_dir=cache_dir ) target_class_name = diffusers_config["_class_name"] for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): for pipeline_type, pipeline_class_name in pipeline_mapping.items(): if target_class_name == pipeline_class_name or (pipeline_class_name in target_class_name): inferred_model_type = pipeline_type break if inferred_model_type is not None: break if inferred_model_type is None: # we use the class name in this case inferred_model_type = target_class_name.replace("DiffusionPipeline", "").replace("Pipeline", "") return inferred_model_type