packages/tasks-gen/snippets-fixtures/conversational-vlm-stream/python/huggingface_hub/0.hf-inference.py (29 lines of code) (raw):
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
provider="hf-inference",
api_key=os.environ["HF_TOKEN"],
)
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
],
stream=True,
)
for chunk in stream:
print(chunk.choices[0].delta.content, end="")