in container/sagemaker-tensorflow-inferentia/build_artifacts/sagemaker/serve.py [0:0]
def __init__(self):
self._state = "initializing"
self._nginx = None
self._tfs = []
self._gunicorn = None
self._gunicorn_command = None
self._enable_python_service = False
self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13")
self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080")
self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error")
self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None")
self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None)
self._gunicorn_workers = os.environ.get("SAGEMAKER_GUNICORN_WORKERS", 1)
self._gunicorn_threads = os.environ.get("SAGEMAKER_GUNICORN_THREADS", 1)
self._tfs_config_path = "/sagemaker/model-config.cfg"
self._tfs_batching_config_path = "/sagemaker/batching-config.cfg"
_enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
_enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL",
"false").lower()
# Use this to specify memory that is needed to initialize CUDA/cuDNN and other GPU libraries
self._tfs_gpu_margin = float(os.environ.get("SAGEMAKER_TFS_FRACTIONAL_GPU_MEM_MARGIN", 0.2))
self._tfs_instance_count = int(os.environ.get("SAGEMAKER_TFS_INSTANCE_COUNT", 1))
self._tfs_wait_time_seconds = int(os.environ.get("SAGEMAKER_TFS_WAIT_TIME_SECONDS", 300))
self._tfs_inter_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTER_OP_PARALLELISM", 0)
self._tfs_intra_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTRA_OP_PARALLELISM", 0)
self._gunicorn_worker_class = os.environ.get("SAGEMAKER_GUNICORN_WORKER_CLASS", 'gevent')
# Neuron relies on NEURON_RT_NUM_CORES to use up cores for the model. Customer can set
# NEURON_RT_NUM_CORES ans SAGEMAKER_TFS_INSTANCE_COUNT to use up all the cores.
# Model uses 1 core and total core is 4:
# SAGEMAKER_TFS_INSTANCE_COUNT = 4, NEURON_RT_NUM_CORES = 1
# Model uses 2 core and total core is 4:
# SAGEMAKER_TFS_INSTANCE_COUNT = 2, NEURON_RT_NUM_CORES = 2
# Also NEUROCORE_GROUP_SIZES is going to be deprecated and just for backward
# compatibility for customers already using NEURONCORE_GROUP_SIZES, set the NEURON_RT_NUM_CORES
if os.environ.get("NEURONCORE_GROUP_SIZES") != None and os.environ.get("NEURON_RT_NUM_CORES") is None:
os.environ["NEURON_RT_NUM_CORES"] = os.environ.get("NEURONCORE_GROUP_SIZES")
if os.environ.get("OMP_NUM_THREADS") is None:
os.environ["OMP_NUM_THREADS"] = "1"
if _enable_batching not in ["true", "false"]:
raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'")
self._tfs_enable_batching = _enable_batching == 'true'
if _enable_multi_model_endpoint not in ["true", "false"]:
raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
self._need_python_service()
log.info("PYTHON SERVICE: {}".format(str(self._enable_python_service)))
self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint
if self._sagemaker_port_range is not None:
parts = self._sagemaker_port_range.split("-")
low = int(parts[0])
hi = int(parts[1])
self._tfs_grpc_ports = []
self._tfs_rest_ports = []
if low + 2 * self._tfs_instance_count > hi:
raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})"
.format(self._sagemaker_port_range))
# select non-overlapping grpc and rest ports based on tfs instance count
for i in range(self._tfs_instance_count):
self._tfs_grpc_ports.append(str(low + 2 * i))
self._tfs_rest_ports.append(str(low + 2 * i + 1))
# concat selected ports respectively in order to pass them to python service
self._tfs_grpc_concat_ports = self._concat_ports(self._tfs_grpc_ports)
self._tfs_rest_concat_ports = self._concat_ports(self._tfs_rest_ports)
else:
# just use the standard default ports
self._tfs_grpc_ports = ["9000"]
self._tfs_rest_ports = ["8501"]
# provide single concat port here for default case
self._tfs_grpc_concat_ports = "9000"
self._tfs_rest_concat_ports = "8501"
# set environment variable for python service
os.environ["TFS_GRPC_PORTS"] = self._tfs_grpc_concat_ports
os.environ["TFS_REST_PORTS"] = self._tfs_rest_concat_ports