optimum/neuron/pipelines/transformers/base.py (319 lines of code) (raw):
# coding=utf-8
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pipelines running different Neuron Accelerators."""
import logging
from typing import Any, Dict, List, Optional, Union
from transformers import (
AudioClassificationPipeline,
AutoConfig,
AutomaticSpeechRecognitionPipeline,
BaseImageProcessor,
FillMaskPipeline,
ImageClassificationPipeline,
ImageSegmentationPipeline,
ObjectDetectionPipeline,
Pipeline,
PreTrainedModel,
PreTrainedTokenizer,
PreTrainedTokenizerFast,
QuestionAnsweringPipeline,
SequenceFeatureExtractor,
TextClassificationPipeline,
TextGenerationPipeline,
TokenClassificationPipeline,
)
from transformers import pipeline as transformers_pipeline
from transformers.feature_extraction_utils import PreTrainedFeatureExtractor
from transformers.onnx.utils import get_preprocessor
from optimum.neuron.modeling_base import NeuronModel
from optimum.neuron.pipelines.transformers.sentence_transformers import (
FeatureExtractionPipeline,
is_sentence_transformer_model,
)
from ...configuration_utils import NeuronConfig
from ...modeling import (
NeuronModelForAudioClassification,
NeuronModelForCTC,
NeuronModelForFeatureExtraction,
NeuronModelForImageClassification,
NeuronModelForMaskedLM,
NeuronModelForQuestionAnswering,
NeuronModelForSemanticSegmentation,
NeuronModelForSentenceTransformers,
NeuronModelForSequenceClassification,
NeuronModelForTokenClassification,
)
from ...modeling_decoder import NeuronModelForCausalLM
logger = logging.getLogger(__name__)
NEURONX_SUPPORTED_TASKS = {
"feature-extraction": {
"impl": FeatureExtractionPipeline,
"class": (NeuronModelForFeatureExtraction,),
"default": "distilbert-base-cased",
"type": "text", # feature extraction is only supported for text at the moment
},
"fill-mask": {
"impl": FillMaskPipeline,
"class": (NeuronModelForMaskedLM,),
"default": "bert-base-cased",
"type": "text",
},
"question-answering": {
"impl": QuestionAnsweringPipeline,
"class": (NeuronModelForQuestionAnswering,),
"default": "distilbert-base-cased-distilled-squad",
"type": "text",
},
"text-classification": {
"impl": TextClassificationPipeline,
"class": (NeuronModelForSequenceClassification,),
"default": "distilbert-base-uncased-finetuned-sst-2-english",
"type": "text",
},
"token-classification": {
"impl": TokenClassificationPipeline,
"class": (NeuronModelForTokenClassification,),
"default": "dbmdz/bert-large-cased-finetuned-conll03-english",
"type": "text",
},
"text-generation": {
"impl": TextGenerationPipeline,
"class": (NeuronModelForCausalLM,),
"default": "Qwen/Qwen2.5-0.5B-Instruct",
"type": "text",
},
"image-classification": {
"impl": ImageClassificationPipeline,
"class": (NeuronModelForImageClassification,),
"default": "microsoft/beit-base-patch16-224-pt22k-ft22k",
"type": "image",
},
"image-segmentation": {
"impl": ImageSegmentationPipeline,
"class": (NeuronModelForSemanticSegmentation,),
"default": "apple/deeplabv3-mobilevit-small",
"type": "image",
},
"object-detection": {
"impl": ObjectDetectionPipeline,
"class": (NeuronModelForSemanticSegmentation,),
"default": "apple/deeplabv3-mobilevit-small",
"type": "image",
},
"automatic-speech-recognition": {
"impl": AutomaticSpeechRecognitionPipeline,
"class": (NeuronModelForCTC,),
"default": "facebook/wav2vec2-large-960h-lv60-self",
"type": "audio",
},
"audio-classification": {
"impl": AudioClassificationPipeline,
"class": (NeuronModelForAudioClassification,),
"default": "facebook/wav2vec2-large-960h-lv60-self",
"type": "audio",
},
}
def check_model_type(self, supported_models: Union[List[str], dict]):
"""
Dummy function to avoid the error logs raised by https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/pipelines/base.py#L1091
"""
pass
def load_pipeline(
model,
targeted_task,
tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]],
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]],
image_processor: Optional[Union[str, BaseImageProcessor]],
load_tokenizer: bool,
load_feature_extractor: bool,
load_image_processor: bool,
supported_tasks=NEURONX_SUPPORTED_TASKS,
input_shapes={},
export=False,
subfolder: str = "",
token: Optional[Union[bool, str]] = None,
revision: str = "main",
compiler_args: Optional[Dict[str, Any]] = {},
hub_kwargs: Optional[Dict[str, Any]] = {},
**kwargs,
):
# loads default model
if model is None:
model_id = supported_tasks[targeted_task]["default"]
model = supported_tasks[targeted_task]["class"][0].from_pretrained(
model_id, export=True, **compiler_args, **input_shapes, **hub_kwargs, **kwargs
)
# loads model from model id and converts it to neuronx optionally
elif isinstance(model, str):
model_id = model
neuronx_model_class = supported_tasks[targeted_task]["class"][0]
# Try to determine the correct feature extraction class to use.
if targeted_task == "feature-extraction" and is_sentence_transformer_model(
model, token=token, revision=revision
):
logger.info("Using Sentence Transformers compatible Feature extraction pipeline")
neuronx_model_class = NeuronModelForSentenceTransformers
model = neuronx_model_class.from_pretrained(
model, export=export, **compiler_args, **input_shapes, **hub_kwargs, **kwargs
)
# uses neuron model
elif isinstance(model, NeuronModel):
if tokenizer is None and load_tokenizer:
for preprocessor in model.preprocessors:
if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
tokenizer = preprocessor
break
if tokenizer is None:
raise ValueError(
"Could not automatically find a tokenizer for the NeuronModel, you must pass a tokenizer explicitly"
)
if feature_extractor is None and load_feature_extractor:
for preprocessor in model.preprocessors:
if isinstance(preprocessor, SequenceFeatureExtractor):
feature_extractor = preprocessor
break
if feature_extractor is None:
raise ValueError(
"Could not automatically find a feature extractor for the NeuronModel, you must pass a "
"feature_extractor explictly"
)
if image_processor is None and load_image_processor:
for preprocessor in model.preprocessors:
if isinstance(preprocessor, BaseImageProcessor):
image_processor = preprocessor
break
if image_processor is None:
raise ValueError(
"Could not automatically find an image_processor for the NeuronModel, you must pass an image processor explicitly"
)
model_id = None
else:
raise ValueError(
f"""Model {model} is not supported. Please provide a valid model either as string or NeuronModel.
You can also provide non model then a default one will be used"""
)
return model, model_id, tokenizer, feature_extractor, image_processor
def pipeline(
task: str = None,
model: Optional[Union[str, NeuronModel]] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
use_fast: bool = True,
export: bool = False,
input_shapes: Optional[Dict[str, int]] = {},
compiler_args: Optional[Dict[str, int]] = {},
token: Optional[Union[str, bool]] = None,
revision: Optional[str] = None,
trust_remote_code: Optional[bool] = None,
**kwargs,
) -> Pipeline:
if task not in NEURONX_SUPPORTED_TASKS:
raise ValueError(
f"Task {task} is not supported for the optimum neuron pipeline. Supported tasks are {list(NEURONX_SUPPORTED_TASKS.keys())}"
)
# copied from transformers.pipelines.__init__.py
commit_hash = kwargs.pop("_commit_hash", None)
hub_kwargs = {
"revision": revision,
"token": token,
"trust_remote_code": trust_remote_code,
"_commit_hash": commit_hash,
}
config = kwargs.get("config", None)
if config is None:
if isinstance(model, str):
config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **kwargs)
hub_kwargs["_commit_hash"] = config._commit_hash
elif isinstance(model, (PreTrainedModel, NeuronModel)):
if hasattr(model, "encoder"):
config = model.encoder.config
else:
config = model.config
neuron_config = getattr(config, "neuron", None)
if neuron_config is None:
if isinstance(model, str):
try:
neuron_config = NeuronConfig.from_pretrained(model, token=token, revision=revision)
except EnvironmentError:
# If the model is not a Neuron model, we will just ignore the error
pass
elif isinstance(model, NeuronModel):
neuron_config = getattr(model, "neuron_config", None)
if export:
if neuron_config is not None:
raise ValueError("This model has already been exported to Neuron format")
if not input_shapes:
input_shapes = {"batch_size": 1, "sequence_length": 128}
logger.warning(f"No input shapes provided, using default shapes, {input_shapes}")
else:
if neuron_config is None:
raise ValueError("The model must be exported to Neuron format first")
if input_shapes:
logger.warning("Input shapes can only be set during export")
no_feature_extractor_tasks = set()
no_tokenizer_tasks = set()
no_image_processor_tasks = set()
for _task, values in NEURONX_SUPPORTED_TASKS.items():
if values["type"] == "text":
no_feature_extractor_tasks.add(_task)
no_image_processor_tasks.add(_task)
elif values["type"] in {"image", "video"}:
no_tokenizer_tasks.add(_task)
no_feature_extractor_tasks.add(_task)
elif values["type"] in {"audio"}:
no_tokenizer_tasks.add(_task)
no_image_processor_tasks.add(_task)
elif values["type"] not in ["multimodal", "audio", "video"]:
raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}")
# copied from transformers.pipelines.__init__.py l.609
if task in no_tokenizer_tasks:
# These will never require a tokenizer.
# the model on the other hand might have a tokenizer, but
# the files could be missing from the hub, instead of failing
# on such repos, we just force to not load it.
load_tokenizer = False
else:
load_tokenizer = True
if task in no_feature_extractor_tasks:
load_feature_extractor = False
else:
load_feature_extractor = True
if task in no_image_processor_tasks:
load_image_processor = False
else:
load_image_processor = True
model, model_id, tokenizer, feature_extractor, image_processor = load_pipeline(
model=model,
targeted_task=task,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
load_tokenizer=load_tokenizer,
load_feature_extractor=load_feature_extractor,
load_image_processor=load_image_processor,
export=export,
input_shapes=input_shapes,
compiler_args=compiler_args,
supported_tasks=NEURONX_SUPPORTED_TASKS,
hub_kwargs=hub_kwargs,
token=token,
)
if tokenizer is None and load_tokenizer:
tokenizer = get_preprocessor(model_id)
if feature_extractor is None and load_feature_extractor:
feature_extractor = get_preprocessor(model_id)
if image_processor is None and load_image_processor:
image_processor = get_preprocessor(model_id)
# If we don't specify a batch_size, the pipeline will assume batch_size 1
# and it will process the inputs one by one instead of processing them in parallel
batch_size = 1
neuron_config = (
getattr(config, "neuron", None)
or getattr(model.config, "neuron", None)
or getattr(model, "neuron_config", None)
)
if isinstance(neuron_config, NeuronConfig):
batch_size = neuron_config.batch_size
elif isinstance(neuron_config, dict):
for attr in ["batch_size", "static_batch_size"]:
batch_size = neuron_config.get(attr, batch_size)
if batch_size > 1 and tokenizer is not None and tokenizer.pad_token_id is None:
# The pipeline needs a pad token to be able to batch
if isinstance(model.config.eos_token_id, list):
tokenizer.pad_token_id = model.config.eos_token_id[0]
else:
tokenizer.pad_token_id = model.config.eos_token_id
if hasattr(NEURONX_SUPPORTED_TASKS[task]["impl"], "check_model_type"):
NEURONX_SUPPORTED_TASKS[task]["impl"].check_model_type = check_model_type
return transformers_pipeline(
task,
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
use_fast=use_fast,
batch_size=batch_size,
pipeline_class=NEURONX_SUPPORTED_TASKS[task]["impl"],
device=model.device,
**kwargs,
)