packages/tasks-gen/snippets-fixtures/with-access-token/python/requests/0.hf-inference.py (18 lines of code) (raw):

import requests API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions" headers = { "Authorization": "Bearer hf_xxx", } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() response = query({ "messages": [ { "role": "user", "content": "What is the capital of France?" } ], "model": "meta-llama/Llama-3.1-8B-Instruct" }) print(response["choices"][0]["message"])