notebooks/playground-examples/openai-elasticsearch-client.ipynb (175 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Playground: RAG with OpenAI & Elasticsearch Python Client\n",
"[](https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/playground/openai-elasticsearch-client.ipynb)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zQlYpYkI46Ff",
"outputId": "83677846-8a6a-4b49-fde0-16d473778814"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"langchain-openai 0.1.4 requires langchain-core<0.2.0,>=0.1.46, but you have langchain-core 0.2.9 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install -qU elasticsearch openai"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "GCZR7-zK810e"
},
"source": [
"## Example Integration code\n",
"To adapt this example for your use-case:\n",
"- Update your connection details in the `es_client`\n",
"- Replace the es_query with the query suggested in Playground\n",
"- Replace the index_source_fields with the fields you want to use for context, per index. \n",
"- Update the prompt to reflect the Plaground prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DofNZ2w25nIr"
},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"from openai import OpenAI\n",
"from getpass import getpass\n",
"\n",
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"# Update the Elasticsearch URL based on your own setup\n",
"es_client = Elasticsearch(\"http://localhost:9200\", api_key=ELASTIC_API_KEY)\n",
"\n",
"openai_client = OpenAI(\n",
" api_key=\"OPEN_AI_API_KEY\",\n",
")\n",
"\n",
"# Update the source fields based on your context field options\n",
"# This example will be for bm25-index, we use the text field for context\n",
"index_source_fields = {\"bm25-index\": [\"text\"]}\n",
"\n",
"\n",
"def get_elasticsearch_results(query):\n",
" # Update the query to match your retriever provided in Playground\n",
" es_query = {\n",
" \"retriever\": {\n",
" \"standard\": {\"query\": {\"multi_match\": {\"query\": query, \"fields\": [\"text\"]}}}\n",
" },\n",
" \"size\": 3,\n",
" }\n",
"\n",
" result = es_client.search(index=\"bm25-index\", body=es_query)\n",
" return result[\"hits\"][\"hits\"]\n",
"\n",
"\n",
"def create_openai_prompt(question, results):\n",
" context = \"\"\n",
" for hit in results:\n",
" inner_hit_path = f\"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}\"\n",
"\n",
" ## For semantic_text matches, we need to extract the text from the inner_hits\n",
" if \"inner_hits\" in hit and inner_hit_path in hit[\"inner_hits\"]:\n",
" context += \"\\n --- \\n\".join(\n",
" inner_hit[\"_source\"][\"text\"]\n",
" for inner_hit in hit[\"inner_hits\"][inner_hit_path][\"hits\"][\"hits\"]\n",
" )\n",
" else:\n",
" source_field = index_source_fields.get(hit[\"_index\"])[0]\n",
" hit_context = hit[\"_source\"][source_field]\n",
" context += f\"{hit_context}\\n\"\n",
"\n",
" # Update the prompt based on your own requirements\n",
" prompt = f\"\"\"\n",
" Instructions:\n",
" \n",
" - You are an assistant for question-answering tasks.\n",
" - Answer questions truthfully and factually using only the information presented.\n",
" - If you don't know the answer, just say that you don't know, don't make up an answer!\n",
" - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.\n",
" - Use markdown format for code examples.\n",
" - You are correct, factual, precise, and reliable.\n",
" \n",
"\n",
" Context:\n",
" {context}\n",
"\n",
" Question: {question}\n",
" Answer:\n",
" \"\"\"\n",
"\n",
" return prompt\n",
"\n",
"\n",
"def generate_openai_completion(user_prompt):\n",
" response = openai_client.chat.completions.create(\n",
" model=\"gpt-4o\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" ],\n",
" )\n",
"\n",
" return response.choices[0].message.content\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" question = \"what is this?\"\n",
" elasticsearch_results = get_elasticsearch_results(question)\n",
" context_prompt = create_openai_prompt(question, elasticsearch_results)\n",
" openai_completion = generate_openai_completion(context_prompt)\n",
" print(openai_completion)"
]
}
],
"metadata": {
"colab": {
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}