def _load_model()

in tensorflow/inference/docker/build_artifacts/sagemaker/python_service.py [0:0]


    def _load_model(self, model_name, base_path, rest_port, grpc_port, model_index):
        if self.validate_model_dir(base_path):
            try:
                self._import_custom_modules(model_name)
                tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
                tfs_config_file = "/sagemaker/tfs-config/{}/{}/model-config.cfg".format(
                    model_name, model_index
                )
                log.info("tensorflow serving model config: \n%s\n", tfs_config)
                os.makedirs(os.path.dirname(tfs_config_file))
                with open(tfs_config_file, "w", encoding="utf8") as f:
                    f.write(tfs_config)

                batching_config_file = "/sagemaker/batching/{}/{}/batching-config.cfg".format(
                    model_name, model_index
                )
                if self._tfs_enable_batching:
                    tfs_utils.create_batching_config(batching_config_file)

                cmd = tfs_utils.tfs_command(
                    grpc_port,
                    rest_port,
                    tfs_config_file,
                    self._tfs_enable_batching,
                    batching_config_file,
                    tfs_intra_op_parallelism=self._tfs_intra_op_parallelism,
                    tfs_inter_op_parallelism=self._tfs_inter_op_parallelism,
                )
                log.info("MME starts tensorflow serving with command: {}".format(cmd))
                p = subprocess.Popen(cmd.split())

                tfs_utils.wait_for_model(rest_port, model_name, self._tfs_wait_time_seconds, p.pid)

                log.info("started tensorflow serving (pid: %d)", p.pid)

                return {
                    "status": falcon.HTTP_200,
                    "body": json.dumps(
                        {
                            "success": "Successfully loaded model {}, "
                            "listening on rest port {} "
                            "and grpc port {}.".format(model_name, rest_port, grpc_port)
                        },
                    ),
                    "pid": p.pid,
                }
            except MultiModelException as multi_model_exception:
                if multi_model_exception.code == 409:
                    return {
                        "status": falcon.HTTP_409,
                        "body": multi_model_exception.msg,
                        "pid": multi_model_exception.pid,
                    }
                elif multi_model_exception.code == 408:
                    cpu_memory_usage = tfs_utils.get_cpu_memory_util()
                    log.info(f"cpu memory usage {cpu_memory_usage}")
                    if cpu_memory_usage > 70:
                        return {
                            "status": falcon.HTTP_507,
                            "body": "Memory exhausted: not enough memory to start TFS instance",
                            "pid": multi_model_exception.pid,
                        }
                    return {
                        "status": falcon.HTTP_408,
                        "body": multi_model_exception.msg,
                        "pid": multi_model_exception.pid,
                    }
                else:
                    return {
                        "status": falcon.HTTP_500,
                        "body": multi_model_exception.msg,
                        "pid": multi_model_exception.pid,
                    }
            except FileExistsError as e:
                return {
                    "status": falcon.HTTP_409,
                    "body": json.dumps(
                        {"error": "Model {} is already loaded. {}".format(model_name, str(e))}
                    ),
                }
            except OSError as os_error:
                log.error(f"failed to load model with exception {os_error}")
                if os_error.errno == 12:
                    return {
                        "status": falcon.HTTP_507,
                        "body": "Memory exhausted: not enough memory to start TFS instance",
                    }
                else:
                    return {
                        "status": falcon.HTTP_500,
                        "body": os_error.strerror,
                    }
        else:
            return {
                "status": falcon.HTTP_404,
                "body": json.dumps(
                    {
                        "error": "Could not find valid base path {} for servable {}".format(
                            base_path, model_name
                        )
                    }
                ),
            }