in model-gallery/deploy/llm/vLLM/python_client.py [0:0]
def main():
stream = True
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "你好,介绍一下你自己,越详细越好。",
}
],
}
],
model=model,
max_completion_tokens=2048,
stream=stream,
)
if stream:
for chunk in chat_completion:
print(chunk.choices[0].delta.content, end="")
else:
result = chat_completion.choices[0].message.content
print(result)