in arctic_inference/dynasor/vllm_server.py [0:0]
def main():
register_signal_handlers()
parser = FlexibleArgumentParser(
description="Arctic Inference Dynasor Proxy w/ vLLM Server"
)
parser = make_arg_parser(parser)
args = parser.parse_args()
vllm_port = args.port
openai_port = vllm_port
args.port = vllm_port + 1
vllm_port = args.port
validate_parsed_serve_args(args)
# Start the server in a separate process
server_process = multiprocessing.Process(target=run_vllm_server, args=(args,))
openai_config = openai_server.parse_args([
"--port", str(openai_port),
"--target-base-url", f"http://localhost:{vllm_port}",
])
print(f"Starting OpenAI server: {openai_config}")
proxy_server = multiprocessing.Process(
target=run_openai_server,
args=(openai_config,)
)
server_process.start()
# Wait for the server to be healthy
if check_health(vllm_port):
print(f"Server is healthy and running on port {args.port}")
else:
print("Server failed to become healthy within the timeout period")
server_process.terminate()
server_process.join()
sys.exit(1)
proxy_server.start()
# Wait for the OpenAI server to be healthy
if check_health(openai_port):
print(f"Dynasor Proxy server is healthy and running on port {openai_port}")
else:
print("Dynasor Proxy server failed to become healthy within the timeout period")
proxy_server.terminate()
proxy_server.join()
sys.exit(1)
# Keep the main process running and handle signals
while server_process.is_alive() or proxy_server.is_alive():
server_process.join(timeout=3.0)
proxy_server.join(timeout=3.0)