packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/requests/0.hf-inference.py (38 lines of code) (raw):
import os
import json
import requests
API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
headers = {
"Authorization": f"Bearer {os.environ['HF_TOKEN']}",
}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
for line in response.iter_lines():
if not line.startswith(b"data:"):
continue
if line.strip() == b"data: [DONE]":
return
yield json.loads(line.decode("utf-8").lstrip("data:").rstrip("/n"))
chunks = query({
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
],
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"stream": True,
})
for chunk in chunks:
print(chunk["choices"][0]["delta"]["content"], end="")