in tensorflow/inference/docker/build_artifacts/sagemaker/python_service.py [0:0]
def __init__(self):
if SAGEMAKER_MULTI_MODEL_ENABLED:
self._mme_tfs_instances_status: dict[str, [TfsInstanceStatus]] = {}
self._tfs_ports = self._parse_sagemaker_port_range_mme(SAGEMAKER_TFS_PORT_RANGE)
self._tfs_available_ports = self._parse_sagemaker_port_range_mme(
SAGEMAKER_TFS_PORT_RANGE
)
# If Multi-Model mode is enabled, dependencies/handlers will be imported
# during the _handle_load_model_post()
self.model_handlers = {}
else:
self._tfs_grpc_ports = self._parse_concat_ports(TFS_GRPC_PORTS)
self._tfs_rest_ports = self._parse_concat_ports(TFS_REST_PORTS)
self._channels = {}
for grpc_port in self._tfs_grpc_ports:
# Initialize grpc channel here so gunicorn worker could have mapping
# between each grpc port and channel
self._setup_channel(grpc_port)
self._default_handlers_enabled = False
if os.path.exists(INFERENCE_SCRIPT_PATH):
# Single-Model Mode & Multi-Model Mode both use one inference.py
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(
self._handler, self._input_handler, self._output_handler
)
else:
self._handlers = default_handler
self._default_handlers_enabled = True
self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
self._tfs_inter_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTER_OP_PARALLELISM", 0)
self._tfs_intra_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTRA_OP_PARALLELISM", 0)
self._tfs_instance_count = int(os.environ.get("SAGEMAKER_TFS_INSTANCE_COUNT", 1))
self._gunicorn_workers = int(os.environ.get("SAGEMAKER_GUNICORN_WORKERS", 1))
self._tfs_wait_time_seconds = int(
os.environ.get("SAGEMAKER_TFS_WAIT_TIME_SECONDS", 55 // self._tfs_instance_count)
)