notebooks/search/_nbtest.setup.ipynb (100 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e180af3a-3a2c-4186-a577-7051ec6460b1",
"metadata": {},
"outputs": [],
"source": [
"!pip install -qU \"elasticsearch<9\" sentence-transformers==2.7.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63d22ea2-ecca-41bb-b08f-de8ad49cda41",
"metadata": {},
"outputs": [],
"source": [
"# get the Elasticsearch client\n",
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(\n",
" cloud_id=ELASTIC_CLOUD_ID,\n",
" api_key=ELASTIC_API_KEY,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b367acaa-90e6-43d0-b9ae-cf42a0e2c0f1",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from urllib.request import urlopen\n",
"from sentence_transformers import SentenceTransformer\n",
"\n",
"if NBTEST[\"notebook\"] in [\n",
" \"01-keyword-querying-filtering.ipynb\",\n",
" \"02-hybrid-search.ipynb\",\n",
" \"06-synonyms-api.ipynb\",\n",
"]:\n",
" # these tests need book_index to exist ahead of time\n",
" client.indices.delete(index=\"book_index\", ignore_unavailable=True)\n",
"\n",
" mappings = {\n",
" \"properties\": {\n",
" \"title_vector\": {\n",
" \"type\": \"dense_vector\",\n",
" \"dims\": 384,\n",
" \"index\": \"true\",\n",
" \"similarity\": \"cosine\",\n",
" }\n",
" }\n",
" }\n",
" client.indices.create(index=\"book_index\", mappings=mappings)\n",
"\n",
" url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n",
" response = urlopen(url)\n",
" books = json.loads(response.read())\n",
"\n",
" model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
" operations = []\n",
" for book in books:\n",
" operations.append({\"index\": {\"_index\": \"book_index\"}})\n",
" # Transforming the title into an embedding using the model\n",
" book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
" operations.append(book)\n",
" client.bulk(index=\"book_index\", operations=operations, refresh=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}