in tensorflow/inference/docker/build_artifacts/sagemaker/python_service.py [0:0]
def _handle_load_model_post(self, res, data): # noqa: C901
with lock():
model_name = data["model_name"]
base_path = data["url"]
# sync sync_local_mme_instance_status & update available ports
self._sync_local_mme_instance_status()
self._update_ports_available()
self._sync_model_handlers()
# model is already loaded
if model_name in self._mme_tfs_instances_status:
res.status = falcon.HTTP_409
res.body = json.dumps({"error": "Model {} is already loaded.".format(model_name)})
return
is_load_successful = True
response = {}
for i in range(self._tfs_instance_count):
# check if there are available ports
if not self._ports_available():
is_load_successful = False
response["status"] = falcon.HTTP_507
response["body"] = json.dumps(
{"error": "Memory exhausted: no available ports to load the model."}
)
break
tfs_rest_port = self._tfs_available_ports["rest_port"].pop()
tfs_grpc_port = self._tfs_available_ports["grpc_port"].pop()
response = self._load_model(model_name, base_path, tfs_rest_port, tfs_grpc_port, i)
if "pid" in response:
self._mme_tfs_instances_status.setdefault(model_name, []).append(
TfsInstanceStatus(tfs_rest_port, tfs_grpc_port, response["pid"])
)
if response["status"] != falcon.HTTP_200:
log.info(f"Failed to load model : {model_name}")
is_load_successful = False
break
if not is_load_successful:
log.info(f"Failed to load model : {model_name}, Starting to cleanup...")
self._delete_model(model_name)
self._remove_model_config(model_name)
else:
self._upload_mme_instance_status()
res.status = response["status"]
res.body = response["body"]