vectordb-genai-101/chat-app-code/backend/services/inference_service.py (27 lines of code) (raw):
import logging
from elasticsearch import Elasticsearch
import os
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# TODO move this to central location so this and search_service don't create separate connections
# Initialize the Elasticsearch client
es_client = Elasticsearch(
hosts=os.getenv('ES_URL', 'http://kubernetes-vm:9200'),
# api_key=os.getenv('ES_API_KEY'),
basic_auth=(
os.getenv('ES_USER', 'elastic'),
os.getenv('ES_PASSWORD', 'changeme')
),
timeout=90
)
logging.info(f"Elasticsearch client Info: {es_client.info()}")
def es_chat_completion(prompt, inference_id):
logging.info(f"Starting Elasticsearch chat completion with Inference ID: {inference_id}")
response = es_client.inference.inference(
inference_id = inference_id,
task_type = "completion",
input = prompt,
timeout="90s"
)
logging.info(f"Response from Elasticsearch chat completion: {response}")
return response['completion'][0]['result']