demo-javascript/code/azure-search-vector-sample.js (258 lines of code) (raw):
const dotenv = require("dotenv");
const fs = require("fs");
const { SearchIndexClient, SearchClient } = require("@azure/search-documents");
const { DefaultAzureCredential } = require("@azure/identity")
const { AzureKeyCredential } = require("@azure/core-auth")
const { OpenAIClient } = require("@azure/openai");
const { promisify } = require('util');
const { Option, program } = require('commander');
async function main() {
program
.option('-e, --embed', 'Recreate embeddings in text-sample.json')
.option('-u, --upload', 'Upload embeddings and data in text-sample.json to the search index')
.option('-q, --query <text>', 'Text of query to issue to search, if any')
.addOption(new Option('-k, --query-kind <kind>', 'Kind of query to issue. Defaults to hybrid').default('hybrid').choices(['text', 'vector', 'hybrid']))
.option('-c, --category-filter <category>', 'Category to filter results to')
.option('-t, --include-title', 'Search over the title field as well as the content field')
.option('--no-semantic-reranker', 'Do not use semantic reranker. Defaults to false')
.parse();
const options = program.opts()
const defaultCredential = new DefaultAzureCredential();
// Load environment variables from .env file
dotenv.config({ path: "../.env" });
// Generate document embeddings
if (options.embed) {
try {
await generateDocumentEmbeddings(defaultCredential);
} catch (err) {
console.error(
`Failed to generate embeddings: ${err.message}`
);
return;
}
}
// Upload documents to Azure AI Search
if (options.upload) {
// Create Azure AI Search index
try {
await createSearchIndex(defaultCredential);
} catch (err) {
console.error(`Failed to create index: ${err.message}`);
return;
}
try {
await uploadDocuments(defaultCredential);
} catch (err) {
console.error(
`Failed to upload documents to search index: ${err.message}`
);
return;
}
}
// Query Azure AI Search
if (options.query) {
try {
await queryDocuments(defaultCredential, options.query, options.queryKind, options.categoryFilter, options.includeTitle, options.semanticReranker);
} catch (err) {
console.error(
`Failed to issue query to search index: ${err.message}`
);
return;
}
}
}
function createSearchClient(defaultCredential) {
const searchServiceEndpoint = process.env.AZURE_SEARCH_SERVICE_ENDPOINT;
const searchServiceApiKey = process.env.AZURE_SEARCH_ADMIN_KEY;
const searchIndexName = process.env.AZURE_SEARCH_INDEX;
let credential = !searchServiceApiKey || searchServiceApiKey.trim() === '' ?
defaultCredential : new AzureKeyCredential(searchServiceApiKey);
return new SearchClient(
searchServiceEndpoint,
searchIndexName,
credential
);
}
function createOpenAiClient(defaultCredential) {
const openAiEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
const openAiKey = process.env.AZURE_OPENAI_KEY;
let credential = !openAiKey || openAiKey.trim() == '' ?
defaultCredential : new AzureKeyCredential(openAiKey);
return new OpenAIClient(openAiEndpoint, credential);
}
const readFileAsync = promisify(fs.readFile);
const writeFileAsync = promisify(fs.writeFile);
async function generateDocumentEmbeddings(defaultCredential) {
console.log("Reading data/text-sample.json...");
const fileData = await readFileAsync("../data/text-sample.json", "utf-8");
let data = JSON.parse(fileData);
console.log("Generating embeddings with Azure OpenAI...");
const client = createOpenAiClient(defaultCredential);
const openAiDeployment = process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT;
const titles = data.map(item => item["title"]);
const content = data.map(item => item["content"]);
const titleEmbeddings = await client.getEmbeddings(openAiDeployment, titles);
const contentEmbeddings = await client.getEmbeddings(openAiDeployment, content);
for (let i = 0; i < data.length; i++) {
data[i]["titleVector"] = titleEmbeddings.data[i].embedding;
data[i]["contentVector"] = contentEmbeddings.data[i].embedding;
}
await writeFileAsync("../data/text-sample.json", JSON.stringify(data, null, 2));
console.log("Wrote embeddings to data/text-sample.json");
}
async function createSearchIndex(defaultCredential) {
const searchServiceEndpoint = process.env.AZURE_SEARCH_SERVICE_ENDPOINT;
const searchServiceApiKey = process.env.AZURE_SEARCH_ADMIN_KEY;
const searchIndexName = process.env.AZURE_SEARCH_INDEX;
const embeddingDimensions = parseInt(process.env.AZURE_OPENAI_EMBEDDING_DIMENSIONS);
let vectorSearchDimensions = isNaN(embeddingDimensions) || embeddingDimensions <= 0 ?
1536 : embeddingDimensions;
let credential = !searchServiceApiKey || searchServiceApiKey.trim() === '' ?
defaultCredential : new AzureKeyCredential(searchServiceApiKey);
const indexClient = new SearchIndexClient(
searchServiceEndpoint,
credential
);
const index = {
name: searchIndexName,
fields: [
{
name: "id",
type: "Edm.String",
key: true,
sortable: true,
filterable: true,
facetable: true,
},
{ name: "title", type: "Edm.String", searchable: true },
{ name: "content", type: "Edm.String", searchable: true },
{
name: "category",
type: "Edm.String",
filterable: true,
searchable: true,
},
{
name: "titleVector",
type: "Collection(Edm.Single)",
searchable: true,
vectorSearchDimensions: vectorSearchDimensions,
vectorSearchProfileName: "myHnswProfile",
},
{
name: "contentVector",
type: "Collection(Edm.Single)",
searchable: true,
vectorSearchDimensions: vectorSearchDimensions,
vectorSearchProfileName: "myHnswProfile",
},
],
vectorSearch: {
algorithms: [{ name: "myHnswAlgorithm", kind: "hnsw" }],
profiles: [
{
name: "myHnswProfile",
algorithmConfigurationName: "myHnswAlgorithm",
},
],
},
semanticSearch: {
configurations: [
{
name: "my-semantic-config",
prioritizedFields: {
contentFields: [{ name: "content" }],
keywordsFields: [{ name: "category" }],
titleField: {
name: "title",
},
},
},
],
},
};
console.log("Creating index...");
await indexClient.createOrUpdateIndex(index);
}
async function uploadDocuments(defaultCredential) {
console.log("Reading data/text-sample.json...");
const fileData = await readFileAsync("../data/text-sample.json", "utf-8");
let data = JSON.parse(fileData);
const searchClient = createSearchClient(defaultCredential);
console.log("Uploading documents to the index...");
// Upload 1 document at a time
for (let i = 0; i < data.length; i++) {
await searchClient.uploadDocuments([data[i]]);
}
console.log("Finished uploading documents");
}
async function queryDocuments(defaultCredential, query, queryKind, categoryFilter, includeTitle, semanticReranker) {
const searchClient = createSearchClient(defaultCredential);
const openAiClient = createOpenAiClient(defaultCredential);
const openAiDeployment = process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT;
let options = {
select: ["title", "content", "category"],
top: 3
};
if (queryKind == "vector" || queryKind == "hybrid") {
let embeddingResponse = await openAiClient.getEmbeddings(openAiDeployment, [query]);
let embedding = embeddingResponse.data[0].embedding;
let vectorFields = includeTitle ? [ "contentVector", "titleVector" ] : [ "contentVector" ];
options["vectorSearchOptions"] = {
queries: [
{
kind: "vector",
vector: embedding,
kNearestNeighborsCount: 50,
fields: vectorFields
}
]
}
}
if (semanticReranker) {
if (queryKind == "text" || queryKind == "hybrid") {
options["queryType"] = "semantic";
} else {
options["semanticQuery"] = query;
}
options["semanticSearchOptions"] = {
answers: {
answerType: "extractive"
},
captions:{
captionType: "extractive"
},
configurationName: "my-semantic-config",
}
}
if (categoryFilter) {
options["filter"] = `category eq '${categoryFilter}'`;
}
const searchText = queryKind == "text" || queryKind == "hybrid" ? query : "*";
const response = await searchClient.search(searchText, options);
if (semanticReranker) {
for await (const answer of response.answers) {
if (answer.highlights) {
console.log(`Semantic answer: ${answer.highlights}`);
} else {
console.log(`Semantic answer: ${answer.text}`);
}
console.log(`Semantic answer score: ${answer.score}\n`);
}
}
for await (const result of response.results) {
console.log('----');
console.log(`Title: ${result.document.title}`);
console.log(`Score: ${result.score}`);
if (semanticReranker) {
console.log(`Reranker Score: ${result.rerankerScore}`); // Reranker score is the semantic score
}
console.log(`Content: ${result.document.content}`);
console.log(`Category: ${result.document.category}`);
if (result.captions) {
const caption = result.captions[0];
if (caption.highlights) {
console.log(`Caption: ${caption.highlights}`);
} else {
console.log(`Caption: ${caption.text}`);
}
}
console.log('----');
console.log(`\n`);
}
}
main();