optimum/neuron/pipelines/transformers/base.py (319 lines of code) (raw):

# coding=utf-8 # Copyright 2023 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Pipelines running different Neuron Accelerators.""" import logging from typing import Any, Dict, List, Optional, Union from transformers import ( AudioClassificationPipeline, AutoConfig, AutomaticSpeechRecognitionPipeline, BaseImageProcessor, FillMaskPipeline, ImageClassificationPipeline, ImageSegmentationPipeline, ObjectDetectionPipeline, Pipeline, PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerFast, QuestionAnsweringPipeline, SequenceFeatureExtractor, TextClassificationPipeline, TextGenerationPipeline, TokenClassificationPipeline, ) from transformers import pipeline as transformers_pipeline from transformers.feature_extraction_utils import PreTrainedFeatureExtractor from transformers.onnx.utils import get_preprocessor from optimum.neuron.modeling_base import NeuronModel from optimum.neuron.pipelines.transformers.sentence_transformers import ( FeatureExtractionPipeline, is_sentence_transformer_model, ) from ...configuration_utils import NeuronConfig from ...modeling import ( NeuronModelForAudioClassification, NeuronModelForCTC, NeuronModelForFeatureExtraction, NeuronModelForImageClassification, NeuronModelForMaskedLM, NeuronModelForQuestionAnswering, NeuronModelForSemanticSegmentation, NeuronModelForSentenceTransformers, NeuronModelForSequenceClassification, NeuronModelForTokenClassification, ) from ...modeling_decoder import NeuronModelForCausalLM logger = logging.getLogger(__name__) NEURONX_SUPPORTED_TASKS = { "feature-extraction": { "impl": FeatureExtractionPipeline, "class": (NeuronModelForFeatureExtraction,), "default": "distilbert-base-cased", "type": "text", # feature extraction is only supported for text at the moment }, "fill-mask": { "impl": FillMaskPipeline, "class": (NeuronModelForMaskedLM,), "default": "bert-base-cased", "type": "text", }, "question-answering": { "impl": QuestionAnsweringPipeline, "class": (NeuronModelForQuestionAnswering,), "default": "distilbert-base-cased-distilled-squad", "type": "text", }, "text-classification": { "impl": TextClassificationPipeline, "class": (NeuronModelForSequenceClassification,), "default": "distilbert-base-uncased-finetuned-sst-2-english", "type": "text", }, "token-classification": { "impl": TokenClassificationPipeline, "class": (NeuronModelForTokenClassification,), "default": "dbmdz/bert-large-cased-finetuned-conll03-english", "type": "text", }, "text-generation": { "impl": TextGenerationPipeline, "class": (NeuronModelForCausalLM,), "default": "Qwen/Qwen2.5-0.5B-Instruct", "type": "text", }, "image-classification": { "impl": ImageClassificationPipeline, "class": (NeuronModelForImageClassification,), "default": "microsoft/beit-base-patch16-224-pt22k-ft22k", "type": "image", }, "image-segmentation": { "impl": ImageSegmentationPipeline, "class": (NeuronModelForSemanticSegmentation,), "default": "apple/deeplabv3-mobilevit-small", "type": "image", }, "object-detection": { "impl": ObjectDetectionPipeline, "class": (NeuronModelForSemanticSegmentation,), "default": "apple/deeplabv3-mobilevit-small", "type": "image", }, "automatic-speech-recognition": { "impl": AutomaticSpeechRecognitionPipeline, "class": (NeuronModelForCTC,), "default": "facebook/wav2vec2-large-960h-lv60-self", "type": "audio", }, "audio-classification": { "impl": AudioClassificationPipeline, "class": (NeuronModelForAudioClassification,), "default": "facebook/wav2vec2-large-960h-lv60-self", "type": "audio", }, } def check_model_type(self, supported_models: Union[List[str], dict]): """ Dummy function to avoid the error logs raised by https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/pipelines/base.py#L1091 """ pass def load_pipeline( model, targeted_task, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]], feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]], image_processor: Optional[Union[str, BaseImageProcessor]], load_tokenizer: bool, load_feature_extractor: bool, load_image_processor: bool, supported_tasks=NEURONX_SUPPORTED_TASKS, input_shapes={}, export=False, subfolder: str = "", token: Optional[Union[bool, str]] = None, revision: str = "main", compiler_args: Optional[Dict[str, Any]] = {}, hub_kwargs: Optional[Dict[str, Any]] = {}, **kwargs, ): # loads default model if model is None: model_id = supported_tasks[targeted_task]["default"] model = supported_tasks[targeted_task]["class"][0].from_pretrained( model_id, export=True, **compiler_args, **input_shapes, **hub_kwargs, **kwargs ) # loads model from model id and converts it to neuronx optionally elif isinstance(model, str): model_id = model neuronx_model_class = supported_tasks[targeted_task]["class"][0] # Try to determine the correct feature extraction class to use. if targeted_task == "feature-extraction" and is_sentence_transformer_model( model, token=token, revision=revision ): logger.info("Using Sentence Transformers compatible Feature extraction pipeline") neuronx_model_class = NeuronModelForSentenceTransformers model = neuronx_model_class.from_pretrained( model, export=export, **compiler_args, **input_shapes, **hub_kwargs, **kwargs ) # uses neuron model elif isinstance(model, NeuronModel): if tokenizer is None and load_tokenizer: for preprocessor in model.preprocessors: if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)): tokenizer = preprocessor break if tokenizer is None: raise ValueError( "Could not automatically find a tokenizer for the NeuronModel, you must pass a tokenizer explicitly" ) if feature_extractor is None and load_feature_extractor: for preprocessor in model.preprocessors: if isinstance(preprocessor, SequenceFeatureExtractor): feature_extractor = preprocessor break if feature_extractor is None: raise ValueError( "Could not automatically find a feature extractor for the NeuronModel, you must pass a " "feature_extractor explictly" ) if image_processor is None and load_image_processor: for preprocessor in model.preprocessors: if isinstance(preprocessor, BaseImageProcessor): image_processor = preprocessor break if image_processor is None: raise ValueError( "Could not automatically find an image_processor for the NeuronModel, you must pass an image processor explicitly" ) model_id = None else: raise ValueError( f"""Model {model} is not supported. Please provide a valid model either as string or NeuronModel. You can also provide non model then a default one will be used""" ) return model, model_id, tokenizer, feature_extractor, image_processor def pipeline( task: str = None, model: Optional[Union[str, NeuronModel]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, use_fast: bool = True, export: bool = False, input_shapes: Optional[Dict[str, int]] = {}, compiler_args: Optional[Dict[str, int]] = {}, token: Optional[Union[str, bool]] = None, revision: Optional[str] = None, trust_remote_code: Optional[bool] = None, **kwargs, ) -> Pipeline: if task not in NEURONX_SUPPORTED_TASKS: raise ValueError( f"Task {task} is not supported for the optimum neuron pipeline. Supported tasks are {list(NEURONX_SUPPORTED_TASKS.keys())}" ) # copied from transformers.pipelines.__init__.py commit_hash = kwargs.pop("_commit_hash", None) hub_kwargs = { "revision": revision, "token": token, "trust_remote_code": trust_remote_code, "_commit_hash": commit_hash, } config = kwargs.get("config", None) if config is None: if isinstance(model, str): config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **kwargs) hub_kwargs["_commit_hash"] = config._commit_hash elif isinstance(model, (PreTrainedModel, NeuronModel)): if hasattr(model, "encoder"): config = model.encoder.config else: config = model.config neuron_config = getattr(config, "neuron", None) if neuron_config is None: if isinstance(model, str): try: neuron_config = NeuronConfig.from_pretrained(model, token=token, revision=revision) except EnvironmentError: # If the model is not a Neuron model, we will just ignore the error pass elif isinstance(model, NeuronModel): neuron_config = getattr(model, "neuron_config", None) if export: if neuron_config is not None: raise ValueError("This model has already been exported to Neuron format") if not input_shapes: input_shapes = {"batch_size": 1, "sequence_length": 128} logger.warning(f"No input shapes provided, using default shapes, {input_shapes}") else: if neuron_config is None: raise ValueError("The model must be exported to Neuron format first") if input_shapes: logger.warning("Input shapes can only be set during export") no_feature_extractor_tasks = set() no_tokenizer_tasks = set() no_image_processor_tasks = set() for _task, values in NEURONX_SUPPORTED_TASKS.items(): if values["type"] == "text": no_feature_extractor_tasks.add(_task) no_image_processor_tasks.add(_task) elif values["type"] in {"image", "video"}: no_tokenizer_tasks.add(_task) no_feature_extractor_tasks.add(_task) elif values["type"] in {"audio"}: no_tokenizer_tasks.add(_task) no_image_processor_tasks.add(_task) elif values["type"] not in ["multimodal", "audio", "video"]: raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}") # copied from transformers.pipelines.__init__.py l.609 if task in no_tokenizer_tasks: # These will never require a tokenizer. # the model on the other hand might have a tokenizer, but # the files could be missing from the hub, instead of failing # on such repos, we just force to not load it. load_tokenizer = False else: load_tokenizer = True if task in no_feature_extractor_tasks: load_feature_extractor = False else: load_feature_extractor = True if task in no_image_processor_tasks: load_image_processor = False else: load_image_processor = True model, model_id, tokenizer, feature_extractor, image_processor = load_pipeline( model=model, targeted_task=task, tokenizer=tokenizer, feature_extractor=feature_extractor, image_processor=image_processor, load_tokenizer=load_tokenizer, load_feature_extractor=load_feature_extractor, load_image_processor=load_image_processor, export=export, input_shapes=input_shapes, compiler_args=compiler_args, supported_tasks=NEURONX_SUPPORTED_TASKS, hub_kwargs=hub_kwargs, token=token, ) if tokenizer is None and load_tokenizer: tokenizer = get_preprocessor(model_id) if feature_extractor is None and load_feature_extractor: feature_extractor = get_preprocessor(model_id) if image_processor is None and load_image_processor: image_processor = get_preprocessor(model_id) # If we don't specify a batch_size, the pipeline will assume batch_size 1 # and it will process the inputs one by one instead of processing them in parallel batch_size = 1 neuron_config = ( getattr(config, "neuron", None) or getattr(model.config, "neuron", None) or getattr(model, "neuron_config", None) ) if isinstance(neuron_config, NeuronConfig): batch_size = neuron_config.batch_size elif isinstance(neuron_config, dict): for attr in ["batch_size", "static_batch_size"]: batch_size = neuron_config.get(attr, batch_size) if batch_size > 1 and tokenizer is not None and tokenizer.pad_token_id is None: # The pipeline needs a pad token to be able to batch if isinstance(model.config.eos_token_id, list): tokenizer.pad_token_id = model.config.eos_token_id[0] else: tokenizer.pad_token_id = model.config.eos_token_id if hasattr(NEURONX_SUPPORTED_TASKS[task]["impl"], "check_model_type"): NEURONX_SUPPORTED_TASKS[task]["impl"].check_model_type = check_model_type return transformers_pipeline( task, model=model, tokenizer=tokenizer, feature_extractor=feature_extractor, image_processor=image_processor, use_fast=use_fast, batch_size=batch_size, pipeline_class=NEURONX_SUPPORTED_TASKS[task]["impl"], device=model.device, **kwargs, )