in Autogen_v0.4/rag_agent/search_helper.py [0:0]
def create_index(index_name: str, analyzer_name: str = "en.microsoft", language_suffix: str = "en"):
index_schema = {
"name": index_name,
"fields": [
{
"name": "id",
"type": "Edm.String",
"key": True,
"sortable": True,
"filterable": True,
"facetable": True
},
{
"name": "docName",
"type": "Edm.String",
"searchable": True
},
{
"name": "pageNumber",
"type": "Edm.String",
"searchable": True
},
{
"name": f"title_{language_suffix}",
"type": "Edm.String",
"analyzer": analyzer_name,
"searchable": True
},
{
"name": f"content_{language_suffix}",
"type": "Edm.String",
"analyzer": analyzer_name,
"searchable": True
},
{
"name": f"category_{language_suffix}",
"type": "Collection(Edm.String)",
"analyzer": analyzer_name,
"filterable": True,
"searchable": True
},
{
"name": f"tags_{language_suffix}",
"type": "Collection(Edm.String)",
"analyzer": analyzer_name,
"filterable": True,
"searchable": True
},
{
"name": "lastUpdated",
"type": "Edm.DateTimeOffset"
},
{
"name": "titleVector",
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 1536,
"vectorSearchProfile": "amlHnswProfile",
},
{
"name": "contentVector",
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 3072,
"vectorSearchProfile": "amlHnswProfile",
},
{
"name": "categoryVector",
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 1536,
"vectorSearchProfile": "amlHnswProfile",
},
{
"name": "tagsVector",
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 1536,
"vectorSearchProfile": "amlHnswProfile",
}
],
"scoringProfiles": [
{
"name": "tagsBoost",
"text": {
"weights": {
f"tags_{language_suffix}": 5
}
},
"functions": []
},
{
"name": "newAndLatest",
"functionAggregation": "sum",
"functions": [
{
"fieldName": "lastUpdated",
"interpolation": "quadratic",
"type": "freshness",
"boost": 10,
"freshness": {
"boostingDuration": "P365D"
}
}
]
}
],
"suggesters": [
{
"name": "sg",
"searchMode": "analyzingInfixMatching",
"sourceFields": [f"title_{language_suffix}"]
}
],
"vectorSearch": {
"algorithms": [
{
"name": "amlHnsw",
"kind": "hnsw",
"hnswParameters": {
"m": 4,
"metric": "cosine"
}
}
],
"profiles": [
{
"name": "amlHnswProfile",
"algorithm": "amlHnsw",
"vectorizer": "amlVectorizer"
}
],
"vectorizers": [
{
"name":"amlVectorizer",
"kind":"azureOpenAI",
"azureOpenAIParameters": {
"resourceUri": azure_openai_endpoint,
"deploymentId": azure_openai_embedding__large_deployment,
"modelName": embedding_model_name,
"apiKey": azure_openai_key
}
}
]
},
"semantic": {
"configurations": [
{
"name": "aml-semantic-config",
"prioritizedFields": {
"titleField": {
"fieldName": f"title_{language_suffix}"
},
"prioritizedKeywordsFields": [
{
"fieldName": f"category_{language_suffix}"
},
{
"fieldName": f"tags_{language_suffix}"
}
],
"prioritizedContentFields": [
{
"fieldName": f"content_{language_suffix}"
}
]
}
}
]
}
}
headers = {'Content-Type': 'application/json',
'api-key': os.getenv("AZURE_SEARCH_ADMIN_KEY", "") }
# Create Index
url = azure_search_endpoint + "/indexes/" + index_name + "?api-version=2024-07-01"
response = requests.get(url, headers=headers)
if response.status_code == 404:
response = requests.put(url, headers=headers, json=index_schema)
index = response.json()
print(index)
else:
print("Index already exists")