miscellaneous/distributed_tensorflow_mask_rcnn/container-serving/resources/serve.py (46 lines of code) (raw):
# This file implements the scoring service shell. You don't necessarily need to modify it for various
# algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until
# gunicorn exits.
#
# The flask server is specified to be the app object in wsgi.py
#
# We set the following parameters:
#
# Parameter Environment Variable Default Value
# --------- -------------------- -------------
# number of workers MODEL_SERVER_WORKERS the number of CPU cores
# timeout MODEL_SERVER_TIMEOUT 70 seconds
from __future__ import print_function
import os
import signal
import subprocess
import sys
model_server_timeout = os.environ.get("MODEL_SERVER_TIMEOUT", 70)
model_server_workers = int(os.environ.get("MODEL_SERVER_WORKERS", 1))
def sigterm_handler(nginx_pid, gunicorn_pid):
try:
os.kill(nginx_pid, signal.SIGQUIT)
except OSError:
pass
try:
os.kill(gunicorn_pid, signal.SIGTERM)
except OSError:
pass
sys.exit(0)
def start_server():
print("Starting the inference server with {} workers.".format(model_server_workers))
# link the log streams to stdout/err so they will be logged to the container logs
subprocess.check_call(["ln", "-sf", "/dev/stdout", "/var/log/nginx/access.log"])
subprocess.check_call(["ln", "-sf", "/dev/stderr", "/var/log/nginx/error.log"])
nginx = subprocess.Popen(["nginx", "-c", "/nginx.conf"])
gunicorn = subprocess.Popen(
[
"gunicorn",
"--timeout",
str(model_server_timeout),
"-k",
"gevent",
"-b",
"unix:/tmp/gunicorn.sock",
"-w",
str(model_server_workers),
"wsgi:app",
]
)
signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
# If either subprocess exits, so do we.
pids = set([nginx.pid, gunicorn.pid])
while True:
pid, _ = os.wait()
if pid in pids:
break
sigterm_handler(nginx.pid, gunicorn.pid)
print("Inference server exiting")
# The main routine just invokes the start function.
if __name__ == "__main__":
start_server()