in docker/build_artifacts/sagemaker/python_service.py [0:0]
def __init__(self):
if SAGEMAKER_MULTI_MODEL_ENABLED:
self._model_tfs_rest_port = {}
self._model_tfs_grpc_port = {}
self._model_tfs_pid = {}
self._tfs_ports = self._parse_sagemaker_port_range_mme(SAGEMAKER_TFS_PORT_RANGE)
# If Multi-Model mode is enabled, dependencies/handlers will be imported
# during the _handle_load_model_post()
self.model_handlers = {}
else:
self._tfs_grpc_ports = self._parse_concat_ports(TFS_GRPC_PORTS)
self._tfs_rest_ports = self._parse_concat_ports(TFS_REST_PORTS)
self._channels = {}
for grpc_port in self._tfs_grpc_ports:
# Initialize grpc channel here so gunicorn worker could have mapping
# between each grpc port and channel
self._setup_channel(grpc_port)
if os.path.exists(INFERENCE_SCRIPT_PATH):
# Single-Model Mode & Multi-Model Mode both use one inference.py
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(self._handler,
self._input_handler,
self._output_handler)
else:
self._handlers = default_handler
self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
self._tfs_wait_time_seconds = int(os.environ.get("SAGEMAKER_TFS_WAIT_TIME_SECONDS", 300))