in src/sagemaker_huggingface_inference_toolkit/mms_model_server.py [0:0]
def start_model_server(handler_service=DEFAULT_HANDLER_SERVICE):
"""Configure and start the model server.
Args:
handler_service (str): python path pointing to a module that defines
a class with the following:
- A ``handle`` method, which is invoked for all incoming inference
requests to the model server.
- A ``initialize`` method, which is invoked at model server start up
for loading the model.
Defaults to ``sagemaker_huggingface_inference_toolkit.handler_service``.
"""
if ENABLE_MULTI_MODEL:
if not os.getenv("SAGEMAKER_HANDLER"):
os.environ["SAGEMAKER_HANDLER"] = handler_service
_set_python_path()
elif "HF_MODEL_ID" in os.environ:
if is_aws_neuron_available():
raise ValueError(
"Hugging Face Hub deployments are currently not supported with AWS Neuron and Inferentia."
"You need to create a `inference.py` script to run your model using AWS Neuron"
)
storage_dir = _load_model_from_hub(
model_id=os.environ["HF_MODEL_ID"],
model_dir=DEFAULT_MMS_MODEL_DIRECTORY,
revision=HF_MODEL_REVISION,
use_auth_token=HF_API_TOKEN,
)
_adapt_to_mms_format(handler_service, storage_dir)
else:
_adapt_to_mms_format(handler_service, model_dir)
_create_model_server_config_file()
if os.path.exists(REQUIREMENTS_PATH):
_install_requirements()
multi_model_server_cmd = [
"multi-model-server",
"--start",
"--model-store",
MODEL_STORE,
"--mms-config",
MMS_CONFIG_FILE,
"--log-config",
DEFAULT_MMS_LOG_FILE,
]
logger.info(multi_model_server_cmd)
subprocess.Popen(multi_model_server_cmd)
mms_process = _retrieve_mms_server_process()
_add_sigterm_handler(mms_process)
_add_sigchild_handler()
mms_process.wait()