def get_optimum_neuron_pipeline()

in src/huggingface_inference_toolkit/optimum_utils.py [0:0]


def get_optimum_neuron_pipeline(task, model_dir):
    """Method to get optimum neuron pipeline for a given task. Method checks if task is supported by optimum neuron and if required environment variables are set, in case model is not converted. If all checks pass, optimum neuron pipeline is returned. If checks fail, an error is raised."""
    logger.info("Getting optimum neuron pipeline.")
    from optimum.neuron.pipelines.transformers.base import (
        NEURONX_SUPPORTED_TASKS,
        pipeline,
    )
    from optimum.neuron.utils import NEURON_FILE_NAME

    # convert from os.path or path
    if not isinstance(model_dir, str):
        model_dir = str(model_dir)

    # check if task is sentence-embeddings and convert to feature-extraction, as sentence-embeddings is supported in feature-extraction pipeline
    if task == "sentence-embeddings":
        task = "feature-extraction"

    # check task support
    if task not in NEURONX_SUPPORTED_TASKS:
        raise ValueError(
            f"Task {task} is not supported by optimum neuron and inf2. Supported tasks are: {list(NEURONX_SUPPORTED_TASKS.keys())}"
        )

    # check if model is already converted and has input shapes available
    export = True
    if NEURON_FILE_NAME in os.listdir(model_dir):
        export = False
    if export:
        logger.info(
            "Model is not converted. Checking if required environment variables are set and converting model."
        )

    # get static input shapes to run inference
    input_shapes = get_input_shapes(model_dir)
    # set NEURON_RT_NUM_CORES to 1 to avoid conflicts with multiple HTTP workers
    # TODO: Talk to optimum team what are the best options for encoder models to run on 2 neuron cores
    # os.environ["NEURON_RT_NUM_CORES"] = "1"
    # get optimum neuron pipeline
    neuron_pipe = pipeline(
        task, model=model_dir, export=export, input_shapes=input_shapes
    )
    return neuron_pipe