notebooks/playground-examples/openai-elasticsearch-client.ipynb (175 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Playground: RAG with OpenAI & Elasticsearch Python Client\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/playground/openai-elasticsearch-client.ipynb)\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zQlYpYkI46Ff", "outputId": "83677846-8a6a-4b49-fde0-16d473778814" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "langchain-openai 0.1.4 requires langchain-core<0.2.0,>=0.1.46, but you have langchain-core 0.2.9 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install -qU elasticsearch openai" ] }, { "cell_type": "markdown", "metadata": { "id": "GCZR7-zK810e" }, "source": [ "## Example Integration code\n", "To adapt this example for your use-case:\n", "- Update your connection details in the `es_client`\n", "- Replace the es_query with the query suggested in Playground\n", "- Replace the index_source_fields with the fields you want to use for context, per index. \n", "- Update the prompt to reflect the Plaground prompt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "DofNZ2w25nIr" }, "outputs": [], "source": [ "from elasticsearch import Elasticsearch\n", "from openai import OpenAI\n", "from getpass import getpass\n", "\n", "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", "\n", "# Update the Elasticsearch URL based on your own setup\n", "es_client = Elasticsearch(\"http://localhost:9200\", api_key=ELASTIC_API_KEY)\n", "\n", "openai_client = OpenAI(\n", " api_key=\"OPEN_AI_API_KEY\",\n", ")\n", "\n", "# Update the source fields based on your context field options\n", "# This example will be for bm25-index, we use the text field for context\n", "index_source_fields = {\"bm25-index\": [\"text\"]}\n", "\n", "\n", "def get_elasticsearch_results(query):\n", " # Update the query to match your retriever provided in Playground\n", " es_query = {\n", " \"retriever\": {\n", " \"standard\": {\"query\": {\"multi_match\": {\"query\": query, \"fields\": [\"text\"]}}}\n", " },\n", " \"size\": 3,\n", " }\n", "\n", " result = es_client.search(index=\"bm25-index\", body=es_query)\n", " return result[\"hits\"][\"hits\"]\n", "\n", "\n", "def create_openai_prompt(question, results):\n", " context = \"\"\n", " for hit in results:\n", " inner_hit_path = f\"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}\"\n", "\n", " ## For semantic_text matches, we need to extract the text from the inner_hits\n", " if \"inner_hits\" in hit and inner_hit_path in hit[\"inner_hits\"]:\n", " context += \"\\n --- \\n\".join(\n", " inner_hit[\"_source\"][\"text\"]\n", " for inner_hit in hit[\"inner_hits\"][inner_hit_path][\"hits\"][\"hits\"]\n", " )\n", " else:\n", " source_field = index_source_fields.get(hit[\"_index\"])[0]\n", " hit_context = hit[\"_source\"][source_field]\n", " context += f\"{hit_context}\\n\"\n", "\n", " # Update the prompt based on your own requirements\n", " prompt = f\"\"\"\n", " Instructions:\n", " \n", " - You are an assistant for question-answering tasks.\n", " - Answer questions truthfully and factually using only the information presented.\n", " - If you don't know the answer, just say that you don't know, don't make up an answer!\n", " - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.\n", " - Use markdown format for code examples.\n", " - You are correct, factual, precise, and reliable.\n", " \n", "\n", " Context:\n", " {context}\n", "\n", " Question: {question}\n", " Answer:\n", " \"\"\"\n", "\n", " return prompt\n", "\n", "\n", "def generate_openai_completion(user_prompt):\n", " response = openai_client.chat.completions.create(\n", " model=\"gpt-4o\",\n", " messages=[\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " ],\n", " )\n", "\n", " return response.choices[0].message.content\n", "\n", "\n", "if __name__ == \"__main__\":\n", " question = \"what is this?\"\n", " elasticsearch_results = get_elasticsearch_results(question)\n", " context_prompt = create_openai_prompt(question, elasticsearch_results)\n", " openai_completion = generate_openai_completion(context_prompt)\n", " print(openai_completion)" ] } ], "metadata": { "colab": { "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.3" } }, "nbformat": 4, "nbformat_minor": 0 }