pai/modelscope/model.py (154 lines of code) (raw):
# Copyright 2023 Alibaba, Inc. or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict, List, Optional, Union
from ..api.image import ImageLabel
from ..common.logging import get_logger
from ..common.utils import to_semantic_version
from ..model._model import (
DefaultServiceConfig,
ModelBase,
ResourceConfig,
StorageConfigBase,
container_serving_spec,
)
from ..serializers import SerializerBase
from ..session import Session, get_default_session
logger = get_logger(__name__)
class ModelScopeModel(ModelBase):
"""A ModelScope ``Model`` that can be deployed in PAI to create a prediction service.
A ModelScopeModel instance includes the model artifact path and information on how to create
prediction service in PAI. By calling the deploy() method, a prediction service is created in
PAI and a :class:`pai.predictor.Predictor` instance is returned that can be used to make
prediction to the service.
Example::
# Initialize a ModelScopeModel.
m: ModelScopeModel = ModelScopeModel(
model_data="oss://bucket-name/path/to/model",
source_dir="./serving/src/",
command="python serving.py",
modelscope_version="latest",
)
# Deploy the model to create an online prediction service.
p: Predictor = m.deploy(
service_name="ms_bert_serving",
instance_type="ecs.gn6i-c4g1.xlarge",
instance_count=1,
options={
"metadata.rpc.keepalive": 5000000,
"features.eas.aliyun.com/extra-ephemeral-storage":"40Gi",
},
)
# Make prediction by sending the data to the online prediction service.
result = p.predict("weather is good")
"""
def __init__(
self,
model_data: Optional[str] = None,
image_uri: Optional[str] = None,
modelscope_version: Optional[str] = None,
command: Optional[str] = None,
source_dir: Optional[str] = None,
git_config: Optional[Dict[str, str]] = None,
port: Optional[int] = None,
environment_variables: Optional[Dict[str, str]] = None,
requirements: Optional[List[str]] = None,
requirements_path: Optional[str] = None,
health_check: Optional[Dict[str, Any]] = None,
storage_configs: Optional[List[StorageConfigBase]] = None,
session: Optional[Session] = None,
):
"""Initialize a ModelScope Model.
Args:
model_data (str): An OSS URI or file path specifies the location of the
model. If model_data is a local file path, it will be uploaded to OSS
bucket before deployment or model registry.
image_uri (str, optional): If specified, the model will use this image to
create the online prediction service, instead of selecting the appropriate
PAI official image based on modelscope_version.
If ``modelscope_version`` is ``None``, then ``image_uri`` is required.
If also ``None``, then a ``ValueError`` will be raised.
modelscope_version (str, optional): Modelscope version you want to use for
executing your model serving code. Defaults to ``None``. Required unless
``image_uri`` is provided.
command (str): The command used to launch the Model server.
source_dir (str, optional): A relative path or an absolute path to the source
code directory used to load model and launch the Model server, it will be
uploaded to the OSS bucket and mounted to the container. If there is a
``requirements.txt`` file under the directory, it will be installed before
the prediction server started.
If 'git_config' is provided, 'source_dir' should be a relative location
to a directory in the Git repo. With the following GitHub repo directory
structure:
.. code::
|----- README.md
|----- src
|----- train.py
|----- test.py
if you need 'src' directory as the source code directory, you can assign
source_dir='./src/'.
git_config (Dict[str, str]): Git configuration used to clone the repo.
Including ``repo``, ``branch``, ``commit``, ``username``, ``password`` and
``token``. The ``repo`` is required. All other fields are optional. ``repo``
specifies the Git repository. If you don't provide ``branch``, the default
value 'master' is used. If you don't provide ``commit``, the latest commit
in the specified branch is used. ``username``, ``password`` and ``token``
are for authentication purpose. For example, the following config:
.. code:: python
git_config = {
'repo': 'https://github.com/modelscope/modelscope.git',
'branch': 'master',
'commit': '9bfc4a9d83c4beaf8378d0a186261ffc1cd9f960'
}
results in cloning the repo specified in 'repo', then checking out the
'master' branch, and checking out the specified commit.
port (int, optional): Expose port of the server in container, the prediction
request will be forward to the port. The environment variable ``LISTENING_PORT``
in the container will be set to this value. If not set, the default
value is 8000.
environment_variables (Dict[str, str], optional): Dictionary of environment
variable key-value pairs to set on the running container.
requirements (List[str], optional): A list of Python package dependency, it
will be installed before the serving container run.
requirements_path (str, optional): A absolute path to the requirements.txt in
the container.
health_check (Dict[str, Any], optional): The health check configuration. If it
not set, A TCP readiness probe will be used to check the health of the
Model server.
storage_configs (List[StorageConfigBase], optional): A list of storage configs
used to mount the storage to the container. The storage can be OSS, NFS,
SharedMemory, or NodeStorage, etc.
session (:class:`pai.session.Session`, optional): A pai session object
manages interactions with PAI REST API.
**kwargs: Additional kwargs passed to the :class:`~pai.model.ModelBase` constructor.
.. tip::
You can find additional parameters for initializing this class at
:class:`~pai.model.ModelBase`.
"""
self._validate_args(image_uri=image_uri, modelscope_version=modelscope_version)
session = session or get_default_session()
self.model_data = model_data
self.image_uri = image_uri
self.modelscope_version = modelscope_version
self.command = command
self.source_dir = source_dir
self.git_config = git_config
self.port = port or DefaultServiceConfig.listen_port
self.environment_variables = environment_variables
self.requirements = requirements
self.requirements_path = requirements_path
self.health_check = health_check
self.storage_configs = storage_configs
super(ModelScopeModel, self).__init__(
model_data=self.model_data,
session=session,
)
# Check image_uri and modelscope_version
self.serving_image_uri()
def _validate_args(self, image_uri: str, modelscope_version: str) -> None:
"""Check if image_uri or modelscope_version arguments are specified."""
if not image_uri and not modelscope_version:
raise ValueError(
"modelscope_version, and image_uri are both None. "
"Specify either modelscope_version or image_uri."
)
def serving_image_uri(self) -> str:
"""Return the Docker image to use for serving.
The :meth:`pai.modelscope.model.ModelScopeModel.deploy` method, that does the
model deployment, calls this method to find the image to use for the
inference service.
Returns:
str: The URI of the Docker image.
"""
if self.image_uri:
return self.image_uri
labels = [
ImageLabel.OFFICIAL_LABEL,
ImageLabel.EAS_LABEL,
ImageLabel.PROVIDER_PAI_LABEL,
ImageLabel.DEVICE_TYPE_GPU,
]
# TODO: filter by instance type (CPU/GPU)
# Filter images by Transformers version
if self.modelscope_version == "latest":
latest_version = self._get_latest_ms_version_for_inference()
labels.append(ImageLabel.framework_version("ModelScope", latest_version))
else:
labels.append(
ImageLabel.framework_version("ModelScope", self.modelscope_version)
)
name = "modelscope-inference:"
list_images = self.session.image_api.list(
name=name,
labels=labels,
workspace_id=0,
verbose=True,
)
if list_images.total_count == 0:
raise ValueError(
"No official image found for modelscope version:"
f" {self.modelscope_version}. Currently supported versions are:"
f" {self._get_supported_ms_versions_for_inference()}"
)
return list_images.items[0]["ImageUri"]
def _get_supported_ms_versions_for_inference(self) -> List[str]:
"""Return the list of supported ModelScope versions for inference."""
labels = [
ImageLabel.OFFICIAL_LABEL,
ImageLabel.EAS_LABEL,
ImageLabel.PROVIDER_PAI_LABEL,
ImageLabel.DEVICE_TYPE_GPU,
ImageLabel.framework_version("ModelScope", "*"),
]
name = "modelscope-inference:"
list_images = self.session.image_api.list(
name=name,
labels=labels,
verbose=True,
workspace_id=0,
).items
res = []
for image in list_images:
for label in image["Labels"]:
if (
label["Key"] == "system.framework.ModelScope"
and label["Value"] not in res
):
res.append(label["Value"])
res.sort(key=lambda x: to_semantic_version(x))
return res
def _get_latest_ms_version_for_inference(self) -> str:
"""Return the latest ModelScope version for inference."""
res = self._get_supported_ms_versions_for_inference()
return max(
res,
key=lambda x: to_semantic_version(x),
)
def deploy(
self,
service_name: str,
instance_type: Optional[str] = None,
instance_count: Optional[int] = 1,
resource_config: Optional[Union[Dict[str, int], ResourceConfig]] = None,
resource_id: Optional[str] = None,
options: Optional[Dict[str, Any]] = None,
wait: bool = True,
serializer: Optional["SerializerBase"] = None,
**kwargs,
):
"""Deploy an online prediction service.
Args:
service_name (str, optional): Name for the online prediction service. The name
must be unique in a region.
instance_type (str, optional): Type of the machine instance, for example,
'ecs.c6.large'. For all supported instance, view the appendix of the
link:
https://help.aliyun.com/document_detail/144261.htm?#section-mci-qh9-4j7
instance_count (int): Number of instance request for the service deploy
(Default 1).
resource_config (Union[ResourceConfig, Dict[str, int]], optional):
Request resource for each instance of the service. Required if
instance_type is not set. Example config:
.. code::
resource_config = {
"cpu": 2, # The number of CPUs that each instance requires
"memory: 4000, # The amount of memory that each instance
# requires, must be an integer, Unit: MB.
# "gpu": 1, # The number of GPUs that each instance
# requires.
# "gpu_memory": 3 # The amount of GPU memory that each
# instance requires, must be an integer,
# Unit: GB.
}
resource_id (str, optional): The ID of the resource group. The service
can be deployed to ``public resource group`` and
``dedicated resource group``.
* If `resource_id` is not specified, the service is deployed
to public resource group.
* If the service deployed in a dedicated resource group, provide
the parameter as the ID of the resource group. Example:
"eas-r-6dbzve8ip0xnzte5rp".
options (Dict[str, Any], optional): Advanced deploy parameters used
to create the online prediction service.
wait (bool): Whether the call should wait until the online prediction
service is ready (Default True).
serializer (:class:`pai.predictor.serializers.BaseSerializer`, optional): A
serializer object used to serialize the prediction request and
deserialize the prediction response.
Returns:
:class:`pai.predictor.Predictor` : A PAI ``Predictor`` instance used for
making prediction to the prediction service.
"""
image_uri = self.serving_image_uri()
self.inference_spec = container_serving_spec(
command=self.command,
image_uri=image_uri,
port=self.port,
source_dir=self.source_dir,
git_config=self.git_config,
environment_variables=self.environment_variables,
requirements=self.requirements,
requirements_path=self.requirements_path,
health_check=self.health_check,
session=self.session,
storage_configs=self.storage_configs,
)
return super(ModelScopeModel, self).deploy(
service_name=service_name,
instance_type=instance_type,
instance_count=instance_count,
resource_config=resource_config,
resource_id=resource_id,
options=options,
wait=wait,
serializer=serializer,
**kwargs,
)