in tensorflow/inference/docker/build_artifacts/sagemaker_neuron/serve.py [0:0]
def __init__(self):
self._state = "initializing"
self._nginx = None
self._tfs = []
self._gunicorn = None
self._gunicorn_command = None
self._enable_python_service = False
self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13")
self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080")
self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error")
self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None")
self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None)
self._gunicorn_workers = os.environ.get("SAGEMAKER_GUNICORN_WORKERS", None)
self._gunicorn_threads = os.environ.get("SAGEMAKER_GUNICORN_THREADS", 1)
self._tfs_config_path = "/sagemaker/model-config.cfg"
self._tfs_batching_config_path = "/sagemaker/batching-config.cfg"
self._user_ncgs = os.environ.get('NEURONCORE_GROUP_SIZES', None)
if self._user_ncgs is None:
os.environ['NEURONCORE_GROUP_SIZES'] = "1"
self._user_ncgs = 1
if self._gunicorn_workers is None:
num_host_cores = os.environ.get("NEURON_CORE_HOST_TOTAL")
if num_host_cores is None:
self._gunicorn_workers = 1
else:
self._gunicorn_workers = num_host_cores
_enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
_enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL",
"false").lower()
if _enable_multi_model_endpoint not in ["true", "false"]:
raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
self._need_python_service()
log.info("PYTHON SERVICE: {}".format(str(self._enable_python_service)))
if _enable_batching not in ["true", "false"]:
raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'")
self._tfs_enable_batching = _enable_batching == "true"
if _enable_multi_model_endpoint not in ["true", "false"]:
raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint
if self._sagemaker_port_range is not None:
parts = self._sagemaker_port_range.split("-")
low = int(parts[0])
hi = int(parts[1])
if low + 2 > hi:
raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})"
.format(self._sagemaker_port_range))
self._tfs_grpc_port = str(low)
self._tfs_rest_port = str(low + 1)
else:
# just use the standard default ports
self._tfs_grpc_port = "9000"
self._tfs_rest_port = "8501"
# set environment variable for python service
os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port
os.environ["TFS_REST_PORT"] = self._tfs_rest_port