in tensorflow/inference/docker/build_artifacts/sagemaker/python_service.py [0:0]
def _load_model(self, model_name, base_path, rest_port, grpc_port, model_index):
if self.validate_model_dir(base_path):
try:
self._import_custom_modules(model_name)
tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
tfs_config_file = "/sagemaker/tfs-config/{}/{}/model-config.cfg".format(
model_name, model_index
)
log.info("tensorflow serving model config: \n%s\n", tfs_config)
os.makedirs(os.path.dirname(tfs_config_file))
with open(tfs_config_file, "w", encoding="utf8") as f:
f.write(tfs_config)
batching_config_file = "/sagemaker/batching/{}/{}/batching-config.cfg".format(
model_name, model_index
)
if self._tfs_enable_batching:
tfs_utils.create_batching_config(batching_config_file)
cmd = tfs_utils.tfs_command(
grpc_port,
rest_port,
tfs_config_file,
self._tfs_enable_batching,
batching_config_file,
tfs_intra_op_parallelism=self._tfs_intra_op_parallelism,
tfs_inter_op_parallelism=self._tfs_inter_op_parallelism,
)
log.info("MME starts tensorflow serving with command: {}".format(cmd))
p = subprocess.Popen(cmd.split())
tfs_utils.wait_for_model(rest_port, model_name, self._tfs_wait_time_seconds, p.pid)
log.info("started tensorflow serving (pid: %d)", p.pid)
return {
"status": falcon.HTTP_200,
"body": json.dumps(
{
"success": "Successfully loaded model {}, "
"listening on rest port {} "
"and grpc port {}.".format(model_name, rest_port, grpc_port)
},
),
"pid": p.pid,
}
except MultiModelException as multi_model_exception:
if multi_model_exception.code == 409:
return {
"status": falcon.HTTP_409,
"body": multi_model_exception.msg,
"pid": multi_model_exception.pid,
}
elif multi_model_exception.code == 408:
cpu_memory_usage = tfs_utils.get_cpu_memory_util()
log.info(f"cpu memory usage {cpu_memory_usage}")
if cpu_memory_usage > 70:
return {
"status": falcon.HTTP_507,
"body": "Memory exhausted: not enough memory to start TFS instance",
"pid": multi_model_exception.pid,
}
return {
"status": falcon.HTTP_408,
"body": multi_model_exception.msg,
"pid": multi_model_exception.pid,
}
else:
return {
"status": falcon.HTTP_500,
"body": multi_model_exception.msg,
"pid": multi_model_exception.pid,
}
except FileExistsError as e:
return {
"status": falcon.HTTP_409,
"body": json.dumps(
{"error": "Model {} is already loaded. {}".format(model_name, str(e))}
),
}
except OSError as os_error:
log.error(f"failed to load model with exception {os_error}")
if os_error.errno == 12:
return {
"status": falcon.HTTP_507,
"body": "Memory exhausted: not enough memory to start TFS instance",
}
else:
return {
"status": falcon.HTTP_500,
"body": os_error.strerror,
}
else:
return {
"status": falcon.HTTP_404,
"body": json.dumps(
{
"error": "Could not find valid base path {} for servable {}".format(
base_path, model_name
)
}
),
}