supporting-blog-content/using-crewai-with-elasticsearch/using_crewai_with_elasticsearch_notebook.ipynb (539 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "LlrEjmtJNpuX" }, "source": [ "# Using CrewAI with Elasticsearch\n", "\n", "This notebook demonstrates how to use CrewAI with Elasticsearch. This notebook is based on the article [Using CrewAI with Elasticsearch](https://www.elastic.co/search-labs/blog/using-crewai-with-elasticsearch)." ] }, { "cell_type": "markdown", "metadata": { "id": "GNaAN-GNO5qp" }, "source": [ "## Installing dependencies and importing packages" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qgclZayCk1Ct", "outputId": "f25349ea-412a-411c-9677-517f933fa5a4" }, "outputs": [], "source": [ "# It is suggested to run this script with Python 3.11\n", "%pip install elasticsearch==8.17 'crewai[tools]'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rAesontNXpLu", "outputId": "a951f429-6aa0-48c0-e7d1-aebab0be9e6c" }, "outputs": [], "source": [ "import json\n", "import os\n", "\n", "from getpass import getpass\n", "from elasticsearch import Elasticsearch\n", "from elasticsearch.helpers import bulk\n", "\n", "from crewai import Agent, Crew, Task\n", "from crewai.tools import tool\n", "from crewai_tools import SerperDevTool, WebsiteSearchTool" ] }, { "cell_type": "markdown", "metadata": { "id": "NwOmnk99Pfh3" }, "source": [ "## Declaring variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 339 }, "id": "GVKJKfFpPWuj", "outputId": "693babf7-0cf0-413d-bfde-4353b3bff938" }, "outputs": [], "source": [ "os.environ[\"ELASTIC_ENDPOINT\"] = getpass(\"Elastic Enpoint: \")\n", "os.environ[\"ELASTIC_API_KEY\"] = getpass(\"Elastic Api Key: \")\n", "os.environ[\"SERPER_API_KEY\"] = getpass(\"Serper API Key: \")\n", "os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API Key: \")" ] }, { "cell_type": "markdown", "metadata": { "id": "3O2HclcYHEsS" }, "source": [ "## Instance a Elasticsearch client" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1LWiop8NYiQF" }, "outputs": [], "source": [ "# Elasticsearch client\n", "_client = Elasticsearch(\n", " hosts=os.environ[\"ELASTIC_ENDPOINT\"],\n", " api_key=os.environ[\"ELASTIC_API_KEY\"],\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "9lvPHaXjPlfu" }, "source": [ "## Creating mappings and inference endpoint" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jInERNQajoNh", "outputId": "8b1fb48a-0781-4931-abb1-4d2e8340c647" }, "outputs": [], "source": [ "try:\n", " _client.options(\n", " request_timeout=60, max_retries=3, retry_on_timeout=True\n", " ).inference.put(\n", " task_type=\"sparse_embedding\",\n", " inference_id=\"clothes-inference\",\n", " body={\n", " \"service\": \"elasticsearch\",\n", " \"service_settings\": {\n", " \"adaptive_allocations\": {\"enabled\": True},\n", " \"num_threads\": 1,\n", " \"model_id\": \".elser_model_2\",\n", " },\n", " },\n", " )\n", "\n", " print(\"Inference endpoint created successfully.\")\n", "\n", "except Exception as e:\n", " print(\n", " f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "*NOTE: After creating the inference endpoint, it is highly recommended to wait 30 seconds for the model to be ready before sending requests.*" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tc88YzAYw31e", "outputId": "d2de37eb-e621-41c4-a1e0-6d14264d303d" }, "outputs": [], "source": [ "try:\n", " _client.indices.create(\n", " index=\"summer-clothes\",\n", " body={\n", " \"mappings\": {\n", " \"properties\": {\n", " \"title\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n", " \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n", " \"price\": {\"type\": \"float\"},\n", " \"semantic_field\": {\n", " \"type\": \"semantic_text\",\n", " \"inference_id\": \"clothes-inference\",\n", " },\n", " }\n", " }\n", " },\n", " )\n", "except Exception as e:\n", " print(\n", " f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n", " )" ] }, { "cell_type": "markdown", "metadata": { "id": "G1634zNrv1OS" }, "source": [ "# Indexing documents" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LVr6TR8qlw2M" }, "outputs": [], "source": [ "documents = [\n", " {\n", " \"title\": \"Twist-Detail Crop Top\",\n", " \"description\": \"Fitted crop top in woven, patterned fabric with linen content. Wide shoulder straps, sweetheart neckline, and gathered side seams for a gently draped effect. Twisted detail at center bust, cut-out section at front, and wide smocking at back. Lined\",\n", " \"price\": 34.99,\n", " },\n", " {\n", " \"title\": \"Rib-knit Tank Top\",\n", " \"description\": \"Short, fitted top in a soft rib knit. Extra-narrow shoulder straps and a square neckline.\",\n", " \"price\": 7.49,\n", " },\n", " {\n", " \"title\": \"Linen-blend Shorts\",\n", " \"description\": \"Shorts in an airy, woven linen blend. High, ruffle-trimmed waist, narrow drawstring and covered elastic at waistband, and discreet side pockets.\",\n", " \"price\": 13.99,\n", " },\n", " {\n", " \"title\": \"Twill Cargo Shorts\",\n", " \"description\": \"Fitted shorts in cotton twill with a V-shaped yoke at front and back. High waist, zip fly with button, and patch front pockets.\",\n", " \"price\": 20.99,\n", " },\n", " {\n", " \"title\": \"Slim Fit Ribbed Tank Top\",\n", " \"description\": \"Slim-fit tank top in medium weight, ribbed cotton-blend jersey with a fitted silhouette. Straight-cut hem.\",\n", " \"price\": 8.49,\n", " },\n", " {\n", " \"title\": \"Relaxed Fit Linen Resort Shirt\",\n", " \"description\": \"Relaxed-fit shirt in airy linen. Resort collar, buttons without placket, yoke at back, and short sleeves. Straight-cut hem. Fabric made from linen is breathable, looks great when ironed or wrinkled, and softens over time.\",\n", " \"price\": 17.99,\n", " },\n", " {\n", " \"title\": \"Swim Shorts\",\n", " \"description\": \"Swim shorts in woven fabric. Drawstring and covered elastic at waistband, side pockets, and a back pocket with hook-loop fastener. Small slit at sides. Mesh liner shorts.\",\n", " \"price\": 14.99,\n", " },\n", " {\n", " \"title\": \"Baggy Fit Cargo Shorts\",\n", " \"description\": \"Baggy-fit cargo shorts in cotton canvas with a generous but not oversized silhouette. Zip fly with button, diagonal side pockets, back pockets with flap and snap fasteners, and bellows leg pockets with snap fasteners.\",\n", " \"price\": 20.99,\n", " },\n", " {\n", " \"title\": \"Muslin Shorts\",\n", " \"description\": \"Shorts in airy cotton muslin. High, ruffle-trimmed waist, covered elastic at waistband, and an extra-narrow drawstring with a bead at ends. Discreet side pockets.\",\n", " \"price\": 15.99,\n", " },\n", " {\n", " \"title\": \"Oversized Lyocell-blend Dress\",\n", " \"description\": \"Short, oversized dress in a woven lyocell blend. Gathered, low-cut V-neck with extra-narrow ties at front, 3/4-length, raglan-cut balloon sleeves with narrow elastic at cuffs, and seams at waist and hips with delicate piping. Unlined.\",\n", " \"price\": 38.99,\n", " },\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N6cI3toLl3DP", "outputId": "e6de913a-0b4d-480e-a736-ac3afda2b568" }, "outputs": [], "source": [ "def build_data():\n", " for doc in documents:\n", " yield {\"_index\": \"summer-clothes\", \"_source\": doc}\n", "\n", "\n", "try:\n", " success, errors = bulk(_client, build_data())\n", " print(f\"{success} documents indexed successfully\")\n", " if errors:\n", " print(\"Errors during indexing:\", errors)\n", "\n", "except Exception as e:\n", " print(f\"Error: {str(e)}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "SxI_L3Kev5Tk" }, "source": [ "# Creating CrewAI custom tool" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eI_Mi-J3oFwr" }, "outputs": [], "source": [ "@tool(\"Elasticsearch custom tool\")\n", "def elasticsearch_tool(question: str) -> str:\n", " \"\"\"\n", " Search in Elasticsearch using semantic search capabilities.\n", "\n", " Args:\n", " question (str): The search query to be semantically matched\n", "\n", " Returns:\n", " str: Concatenated hits from Elasticsearch as string JSON\n", " \"\"\"\n", "\n", " response = _client.search(\n", " index=\"summer-clothes\",\n", " body={\n", " \"size\": 10,\n", " \"_source\": {\"includes\": [\"description\", \"title\", \"price\"]},\n", " \"retriever\": {\n", " \"rrf\": {\n", " \"retrievers\": [\n", " {\"standard\": {\"query\": {\"match\": {\"title\": question}}}},\n", " {\n", " \"standard\": {\n", " \"query\": {\n", " \"semantic\": {\n", " \"field\": \"semantic_field\",\n", " \"query\": question,\n", " }\n", " }\n", " }\n", " },\n", " ]\n", " }\n", " },\n", " },\n", " )\n", "\n", " hits = response[\"hits\"][\"hits\"]\n", "\n", " if not hits:\n", " return \"\"\n", "\n", " result = json.dumps([hit[\"_source\"] for hit in hits], indent=2)\n", "\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "9boWureEn12E" }, "outputs": [], "source": [ "search_tool = SerperDevTool()\n", "web_rag_tool = WebsiteSearchTool()" ] }, { "cell_type": "markdown", "metadata": { "id": "X_0NBD1xv87q" }, "source": [ "# Setup Agents and Tasks" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KP8H1t-gqpDL" }, "outputs": [], "source": [ "es_retriever_agent = Agent(\n", " role=\"Retriever\",\n", " goal=\"Retrieve Elasticsearch documents\",\n", " backstory=\"You are an expert researcher\",\n", " tools=[elasticsearch_tool],\n", " verbose=True,\n", ")\n", "\n", "\n", "internet_researcher_agent = Agent(\n", " role=\"Research analyst\",\n", " goal=\"Provide up-to-date market analysis of the industry\",\n", " backstory=\"You are an expert analyst\",\n", " tools=[search_tool, web_rag_tool],\n", " verbose=True,\n", ")\n", "\n", "\n", "writer_agent = Agent(\n", " role=\"Content Writer\",\n", " goal=\"Craft engaging blog posts about the information gathered\",\n", " backstory=\"A skilled writer with a passion for writing about fashion\",\n", " tools=[],\n", " verbose=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5j88PRahqrwM" }, "outputs": [], "source": [ "es_retriever_task = Task(\n", " description=\"Retrieve documents from the Elasticsearch index.\",\n", " expected_output=\"A list of documents retrieved from the Elasticsearch index based on the query.\",\n", " agent=es_retriever_agent,\n", ")\n", "\n", "\n", "internet_research_task = Task(\n", " description=\"Research the latest trends in the summer clothes industry and provide a summary.\",\n", " expected_output=\"A summary of the top 5 trending clothes for summer with a unique perspective on their significance.\",\n", " agent=internet_researcher_agent,\n", ")\n", "\n", "write_task = Task(\n", " description=\"Compare and contrast the information provided from the Retriever agent and the research agent, use cites. Be short\",\n", " expected_output=\"Short report about Elasticsearch retriever and researcher.\",\n", " agent=writer_agent,\n", " output_file=\"blog-posts/new_post.md\", # The final blog post will be saved here\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "bfOm8wdvwF7N" }, "source": [ "# Executing task" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LXqw4o62qyFN", "outputId": "549a6001-beaf-47ad-d430-ef57813c6a6b" }, "outputs": [], "source": [ "# Use in a crew\n", "crew = Crew(\n", " agents=[es_retriever_agent, internet_researcher_agent, writer_agent],\n", " tasks=[\n", " es_retriever_task,\n", " internet_research_task,\n", " write_task,\n", " ],\n", ")\n", "\n", "# Execute tasks\n", "crew.kickoff()" ] }, { "cell_type": "markdown", "metadata": { "id": "S6WZMJayyzxh" }, "source": [ "## Deleting\n", "\n", "Delete the resources used to prevent them from consuming resources." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9UcQwa41yy_x", "outputId": "8c8337e1-8685-4c33-ae51-ee61e323f0cd" }, "outputs": [], "source": [ "# Cleanup - Delete Index\n", "_client.indices.delete(index=\"summer-clothes\", ignore=[400, 404])\n", "\n", "# Cleanup - Inference endpoint\n", "_client.inference.delete(inference_id=\"clothes-inference\", ignore=[400, 404])" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "3.11.11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 0 }