notebooks/search/_nbtest.setup.ipynb (100 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "e180af3a-3a2c-4186-a577-7051ec6460b1", "metadata": {}, "outputs": [], "source": [ "!pip install -qU \"elasticsearch<9\" sentence-transformers==2.7.0" ] }, { "cell_type": "code", "execution_count": null, "id": "63d22ea2-ecca-41bb-b08f-de8ad49cda41", "metadata": {}, "outputs": [], "source": [ "# get the Elasticsearch client\n", "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", "\n", "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", "\n", "client = Elasticsearch(\n", " cloud_id=ELASTIC_CLOUD_ID,\n", " api_key=ELASTIC_API_KEY,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "b367acaa-90e6-43d0-b9ae-cf42a0e2c0f1", "metadata": {}, "outputs": [], "source": [ "import json\n", "from urllib.request import urlopen\n", "from sentence_transformers import SentenceTransformer\n", "\n", "if NBTEST[\"notebook\"] in [\n", " \"01-keyword-querying-filtering.ipynb\",\n", " \"02-hybrid-search.ipynb\",\n", " \"06-synonyms-api.ipynb\",\n", "]:\n", " # these tests need book_index to exist ahead of time\n", " client.indices.delete(index=\"book_index\", ignore_unavailable=True)\n", "\n", " mappings = {\n", " \"properties\": {\n", " \"title_vector\": {\n", " \"type\": \"dense_vector\",\n", " \"dims\": 384,\n", " \"index\": \"true\",\n", " \"similarity\": \"cosine\",\n", " }\n", " }\n", " }\n", " client.indices.create(index=\"book_index\", mappings=mappings)\n", "\n", " url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n", " response = urlopen(url)\n", " books = json.loads(response.read())\n", "\n", " model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", " operations = []\n", " for book in books:\n", " operations.append({\"index\": {\"_index\": \"book_index\"}})\n", " # Transforming the title into an embedding using the model\n", " book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n", " operations.append(book)\n", " client.bulk(index=\"book_index\", operations=operations, refresh=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }