in src/sagemaker/serve/builder/transformers_builder.py [0:0]
def _transformers_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBase]:
"""Returns predictor depending on local or sagemaker endpoint mode
Returns:
TransformersLocalModePredictor: During local mode deployment
"""
timeout = kwargs.get("model_data_download_timeout")
if timeout:
self.env_vars.update({"MODEL_LOADING_TIMEOUT": str(timeout)})
if "mode" in kwargs and kwargs.get("mode") != self.mode:
overwrite_mode = kwargs.get("mode")
# mode overwritten by customer during model.deploy()
logger.warning(
"Deploying in %s Mode, overriding existing configurations set for %s mode",
overwrite_mode,
self.mode,
)
if overwrite_mode == Mode.SAGEMAKER_ENDPOINT:
self.mode = self.pysdk_model.mode = Mode.SAGEMAKER_ENDPOINT
elif overwrite_mode == Mode.LOCAL_CONTAINER:
self._prepare_for_mode()
self.mode = self.pysdk_model.mode = Mode.LOCAL_CONTAINER
else:
raise ValueError("Mode %s is not supported!" % overwrite_mode)
serializer = self.schema_builder.input_serializer
deserializer = self.schema_builder._output_deserializer
if self.mode == Mode.LOCAL_CONTAINER:
timeout = kwargs.get("model_data_download_timeout")
predictor = TransformersLocalModePredictor(
self.modes[str(Mode.LOCAL_CONTAINER)], serializer, deserializer
)
self.modes[str(Mode.LOCAL_CONTAINER)].create_server(
self.image_uri,
timeout if timeout else DEFAULT_TIMEOUT,
None,
predictor,
self.pysdk_model.env,
jumpstart=False,
)
return predictor
if self.mode == Mode.IN_PROCESS:
timeout = kwargs.get("model_data_download_timeout")
predictor = InProcessModePredictor(
self.modes[str(Mode.IN_PROCESS)], serializer, deserializer
)
self.modes[str(Mode.IN_PROCESS)].create_server(
predictor,
)
return predictor
self._set_instance(kwargs)
if "mode" in kwargs:
del kwargs["mode"]
if "role" in kwargs:
self.pysdk_model.role = kwargs.get("role")
del kwargs["role"]
if not _is_optimized(self.pysdk_model):
env_vars = {}
if str(Mode.LOCAL_CONTAINER) in self.modes:
# upload model artifacts to S3 if LOCAL_CONTAINER -> SAGEMAKER_ENDPOINT
self.pysdk_model.model_data, env_vars = self._prepare_for_mode(
model_path=self.model_path, should_upload_artifacts=True
)
else:
_, env_vars = self._prepare_for_mode()
self.env_vars.update(env_vars)
self.pysdk_model.env.update(self.env_vars)
if (
"SAGEMAKER_SERVE_SECRET_KEY" in self.pysdk_model.env
and not self.pysdk_model.env["SAGEMAKER_SERVE_SECRET_KEY"]
):
del self.pysdk_model.env["SAGEMAKER_SERVE_SECRET_KEY"]
if "endpoint_logging" not in kwargs:
kwargs["endpoint_logging"] = True
if "initial_instance_count" not in kwargs:
kwargs.update({"initial_instance_count": 1})
predictor = self._original_deploy(*args, **kwargs)
predictor.serializer = serializer
predictor.deserializer = deserializer
return predictor