demo-python/code/sorting/sorting.ipynb (280 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sorting Results\n",
"\n",
"The following notebook describes how to:\n",
"\n",
"1. Create a sample index consisting of fictitious hotel data\n",
"1. Sort the results by geographic distance\n",
"\n",
"## Prerequisites\n",
"* An Azure subscription, with access to Azure OpenAI.\n",
"* Azure AI Search basic or higher for this workload\n",
"* A deployment of the text-embedding-3-large model on Azure OpenAI.\n",
"\n",
"## Set up a Python virtual environment in Visual Studio Code\n",
"* Open the Command Palette (Ctrl+Shift+P).\n",
"* Search for Python: Create Environment.\n",
"* Select Venv.\n",
"* Select a Python interpreter. Choose 3.10 or later.\n",
"\n",
"It can take a minute to set up. If you run into problems, see Python environments in VS Code."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Install packages"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install -r requirements.txt --quiet"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Load .env file (Copy .env-sample to .env and update accordingly)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.core.credentials import AzureKeyCredential\n",
"import os\n",
"\n",
"load_dotenv(override=True) # take environment variables from .env.\n",
"\n",
"# Variables not used here do not need to be updated in your .env file\n",
"endpoint = os.environ[\"AZURE_SEARCH_SERVICE_ENDPOINT\"]\n",
"# You do not need a key if you are using keyless authentication\n",
"# To learn more, please visit https://learn.microsoft.com/azure/search/search-security-rbac\n",
"credential = AzureKeyCredential(os.getenv(\"AZURE_SEARCH_ADMIN_KEY\")) if len(os.getenv(\"AZURE_SEARCH_ADMIN_KEY\", \"\")) > 0 else DefaultAzureCredential()\n",
"index_name = os.environ[\"AZURE_SEARCH_INDEX\"]\n",
"aoai_endpoint = os.environ[\"AZURE_OPENAI_ENDPOINT\"]\n",
"# Data is pre-vectorized using text-embedding-3-large\n",
"model_name = \"text-embedding-3-large\"\n",
"aoai_embedding_deployment = os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\", \"text-embedding-3-large\")\n",
"# You do not need a key if you are using keyless authentication\n",
"# To learn more, please visit https://learn.microsoft.com/azure/search/search-howto-managed-identities-data-sources and https://learn.microsoft.com/azure/developer/ai/keyless-connections\n",
"aoai_key = os.getenv(\"AZURE_OPENAI_KEY\", None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Create search index"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created sample index\n"
]
}
],
"source": [
"from azure.search.documents.indexes.models import (\n",
" SearchIndex,\n",
" SearchField,\n",
" ComplexField,\n",
" VectorSearch,\n",
" HnswAlgorithmConfiguration,\n",
" VectorSearchProfile,\n",
" AzureOpenAIVectorizer,\n",
" AzureOpenAIVectorizerParameters\n",
")\n",
"from azure.search.documents.indexes import SearchIndexClient\n",
"\n",
"\n",
"index = SearchIndex(\n",
" name=index_name,\n",
" fields=[\n",
" SearchField(name=\"HotelId\", type=\"Edm.String\", key=True, hidden=False, filterable=True, sortable=False, facetable=False, searchable=True),\n",
" SearchField(name=\"HotelName\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),\n",
" SearchField(name=\"Description\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),\n",
" SearchField(name=\"DescriptionEmbedding\", type=\"Collection(Edm.Single)\", hidden=False, searchable=True, vector_search_dimensions=3072, vector_search_profile_name=\"hnsw\"),\n",
" SearchField(name=\"Description_fr\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True, analyzer_name=\"fr.microsoft\"),\n",
" SearchField(name=\"Description_fr_Embedding\", type=\"Collection(Edm.Single)\", hidden=False, searchable=True, vector_search_dimensions=3072, vector_search_profile_name=\"hnsw\"),\n",
" SearchField(name=\"Category\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"Tags\", type=\"Collection(Edm.String)\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"ParkingIncluded\", type=\"Edm.Boolean\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),\n",
" SearchField(name=\"LastRenovationDate\", type=\"Edm.DateTimeOffset\", hidden=False, filterable=False, sortable=True, facetable=False, searchable=False),\n",
" SearchField(name=\"Rating\", type=\"Edm.Double\", hidden=False, filterable=True, sortable=True, facetable=True, searchable=False),\n",
" ComplexField(name=\"Address\", collection=False, fields=[\n",
" SearchField(name=\"StreetAddress\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),\n",
" SearchField(name=\"City\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"StateProvince\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"PostalCode\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"Country\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True)\n",
" ]),\n",
" SearchField(name=\"Location\", type=\"Edm.GeographyPoint\", hidden=False, filterable=True, sortable=True, facetable=False, searchable=False),\n",
" ComplexField(name=\"Rooms\", collection=True, fields=[\n",
" SearchField(name=\"Description\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True),\n",
" SearchField(name=\"Description_fr\", type=\"Edm.String\", hidden=False, filterable=False, sortable=False, facetable=False, searchable=True, analyzer_name=\"fr.microsoft\"),\n",
" SearchField(name=\"Type\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"BaseRate\", type=\"Edm.Double\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),\n",
" SearchField(name=\"BedOptions\", type=\"Edm.String\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" SearchField(name=\"SleepsCount\", type=\"Edm.Int64\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),\n",
" SearchField(name=\"SmokingAllowed\", type=\"Edm.Boolean\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=False),\n",
" SearchField(name=\"Tags\", type=\"Collection(Edm.String)\", hidden=False, filterable=True, sortable=False, facetable=True, searchable=True),\n",
" ])\n",
" ],\n",
" vector_search=VectorSearch(\n",
" profiles=[VectorSearchProfile(name=\"hnsw\", vectorizer_name=\"openai\", algorithm_configuration_name=\"hnsw\")],\n",
" algorithms=[HnswAlgorithmConfiguration(name=\"hnsw\")],\n",
" vectorizers=[\n",
" AzureOpenAIVectorizer(\n",
" vectorizer_name=\"openai\",\n",
" parameters=AzureOpenAIVectorizerParameters(\n",
" resource_url=aoai_endpoint,\n",
" deployment_name=aoai_embedding_deployment,\n",
" model_name=model_name,\n",
" api_key=aoai_key,\n",
" )\n",
" )\n",
" ]\n",
" )\n",
")\n",
"\n",
"index_client = SearchIndexClient(endpoint, credential)\n",
"result = index_client.create_or_update_index(index)\n",
"print(\"Created sample index\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Upload sample documents\n",
"\n",
"Upload the sample hotel documents to the index\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Uploaded sample documents\n"
]
}
],
"source": [
"from azure.search.documents import SearchClient\n",
"import json\n",
"\n",
"client = SearchClient(endpoint, index_name, credential)\n",
"with open(\"../../../data/hotels.json\", encoding=\"utf-8\", mode=\"r\") as f:\n",
" documents = json.load(f)\n",
" client.upload_documents(documents)\n",
" print(\"Uploaded sample documents\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5. Sort by Distance\n",
"\n",
"The following example query shows how to sort by distance to a geography point\n",
"\n",
"More info here: https://learn.microsoft.com/en-us/azure/search/search-query-odata-orderby"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HotelId: 1, HotelName: Stay-Kay City Hotel, Description: This classic hotel is fully-refurbished and ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Times Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.\n",
"Location: [-73.975403, 40.760586]\n",
"\n",
"HotelId: 15, HotelName: By the Market Hotel, Description: Book now and Save up to 30%. Central location. Walking distance from the Empire State Building & Times Square, in the Chelsea neighborhood. Brand new rooms. Impeccable service.\n",
"Location: [-73.989792, 40.756729]\n",
"\n",
"HotelId: 17, HotelName: City Skyline Antiquity Hotel, Description: In vogue since 1888, the Antiquity Hotel takes you back to bygone era. From the crystal chandeliers that adorn the Green Room, to the arched ceilings of the Grand Hall, the elegance of old New York beckons. Elevate Your Experience. Upgrade to a premiere city skyline view for less, where old world charm combines with dramatic views of the city, local cathedral and midtown.\n",
"Location: [-74.003571, 40.738651]\n",
"\n",
"HotelId: 34, HotelName: Lakefront Captain Inn, Description: Every stay starts with a warm cookie. Amenities like the Counting Sheep sleep experience, our Wake-up glorious breakfast buffet and spacious workout facilities await.\n",
"Location: [-72.761261, 41.725285]\n",
"\n",
"HotelId: 5, HotelName: Red Tide Hotel, Description: On entering this charming hotel in Scarlet Harbor, you'll notice an uncommon blend of antiques, original artwork, and contemporary comforts that give this hotel its signature look. Each suite is furnished to accentuate the views and unique characteristics of the building's classic architecture. No two suites are alike. However, all guests are welcome in the mezzanine plaza, the surrounding gardens, and the northside terrace for evening refreshments.\n",
"Location: [-71.082466, 42.347179]\n",
"\n"
]
}
],
"source": [
"from azure.search.documents.models import VectorizableTextQuery\n",
"query = \"hotel in new york with pool and gym\"\n",
"coordinates = [-73.935242, 40.730610] # New York City coordinates\n",
"\n",
"results = client.search(\n",
" search_text=query,\n",
" vector_queries=[VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields=\"DescriptionEmbedding\")],\n",
" select=\"HotelId,HotelName,Description,Location\",\n",
" top=5,\n",
" order_by=f\"geo.distance(Location, geography'POINT({coordinates[0]} {coordinates[1]})') asc\",\n",
")\n",
"\n",
"for result in list(results):\n",
" print(f\"HotelId: {result['HotelId']}, HotelName: {result['HotelName']}, Description: {result['Description']}\")\n",
" print(f\"Location: {result['Location']['coordinates']}\")\n",
" print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}