def start_vllm()

in ultravox/inference/run_vllm_inference.py [0:0]


def start_vllm(args: InferenceArgs) -> subprocess.Popen:
    env = os.environ.copy()
    env["VLLM_CONFIGURE_LOGGING"] = "0"
    return subprocess.Popen(
        [
            "python",
            "-m",
            "vllm.entrypoints.openai.api_server",
            "--model",
            args.model,
            "--enable-chunked-prefill=False",
            "--max-model-len=8192",
            "--served-model-name=fixie-ai/ultravox",
            "--tensor-parallel-size=8",
            "--uvicorn-log-level=warning",
        ],
        env=env,
        preexec_fn=os.setsid,
    )