def start_model_server()

in src/sagemaker_huggingface_inference_toolkit/mms_model_server.py [0:0]


def start_model_server(handler_service=DEFAULT_HANDLER_SERVICE):
    """Configure and start the model server.

    Args:
        handler_service (str): python path pointing to a module that defines
            a class with the following:

                - A ``handle`` method, which is invoked for all incoming inference
                    requests to the model server.
                - A ``initialize`` method, which is invoked at model server start up
                    for loading the model.

            Defaults to ``sagemaker_huggingface_inference_toolkit.handler_service``.

    """
    if ENABLE_MULTI_MODEL:
        if not os.getenv("SAGEMAKER_HANDLER"):
            os.environ["SAGEMAKER_HANDLER"] = handler_service
        _set_python_path()
    elif "HF_MODEL_ID" in os.environ:
        if is_aws_neuron_available():
            raise ValueError(
                "Hugging Face Hub deployments are currently not supported with AWS Neuron and Inferentia."
                "You need to create a `inference.py` script to run your model using AWS Neuron"
            )
        storage_dir = _load_model_from_hub(
            model_id=os.environ["HF_MODEL_ID"],
            model_dir=DEFAULT_MMS_MODEL_DIRECTORY,
            revision=HF_MODEL_REVISION,
            use_auth_token=HF_API_TOKEN,
        )
        _adapt_to_mms_format(handler_service, storage_dir)
    else:
        _adapt_to_mms_format(handler_service, model_dir)

    _create_model_server_config_file()

    if os.path.exists(REQUIREMENTS_PATH):
        _install_requirements()

    multi_model_server_cmd = [
        "multi-model-server",
        "--start",
        "--model-store",
        MODEL_STORE,
        "--mms-config",
        MMS_CONFIG_FILE,
        "--log-config",
        DEFAULT_MMS_LOG_FILE,
    ]

    logger.info(multi_model_server_cmd)
    subprocess.Popen(multi_model_server_cmd)
    mms_process = _retrieve_mms_server_process()
    _add_sigterm_handler(mms_process)
    _add_sigchild_handler()

    mms_process.wait()