in ultravox/inference/run_vllm_inference.py [0:0]
def start_vllm(args: InferenceArgs) -> subprocess.Popen:
env = os.environ.copy()
env["VLLM_CONFIGURE_LOGGING"] = "0"
return subprocess.Popen(
[
"python",
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
args.model,
"--enable-chunked-prefill=False",
"--max-model-len=8192",
"--served-model-name=fixie-ai/ultravox",
"--tensor-parallel-size=8",
"--uvicorn-log-level=warning",
],
env=env,
preexec_fn=os.setsid,
)