in tools/issue-labeler/src/SearchIndexCreator/IssueIndex.cs [177:325]
private SearchIndex GetSampleIndex()
{
const string vectorSearchHnswProfile = "issue-vector-profile";
const string vectorSearchHnswConfig = "issueHnsw";
const string vectorSearchVectorizer = "issueOpenAIVectorizer";
const string semanticSearchConfig = "issue-semantic-config";
const string binaryCompression = "issue-binary-compression";
const int modelDimensions = 1536;// "Default" value
SearchIndex searchIndex = new SearchIndex(_config["IssueIndexName"])
{
VectorSearch = new()
{
Profiles =
{
new VectorSearchProfile(vectorSearchHnswProfile, vectorSearchHnswConfig)
{
VectorizerName = vectorSearchVectorizer,
CompressionName = binaryCompression
},
},
Algorithms =
{
new HnswAlgorithmConfiguration(vectorSearchHnswConfig),
},
Vectorizers =
{
new AzureOpenAIVectorizer(vectorSearchVectorizer)
{
Parameters = new AzureOpenAIVectorizerParameters()
{
ResourceUri = new Uri(_config["OpenAIEndpoint"]),
DeploymentName = _config["EmbeddingModelName"],
ModelName = _config["EmbeddingModelName"]
}
}
},
Compressions =
{
new BinaryQuantizationCompression(binaryCompression)
}
},
SemanticSearch = new()
{
Configurations =
{
new SemanticConfiguration(semanticSearchConfig, new()
{
TitleField = new SemanticField(fieldName: "Title"),
ContentFields =
{
new SemanticField(fieldName: "chunk")
},
KeywordsFields =
{
new SemanticField(fieldName: "Service"),
new SemanticField(fieldName: "Category")
},
})
},
},
Fields =
{
new SearchableField("chunk_id")
{
IsKey = true,
IsFilterable = false,
IsSortable = true,
IsFacetable = false,
AnalyzerName = LexicalAnalyzerName.Keyword
},
new SearchableField("parent_id")
{
IsFilterable = true,
IsSortable = false,
IsFacetable = false
},
new SearchableField("chunk"),
new SearchField("text_vector", SearchFieldDataType.Collection(SearchFieldDataType.Single))
{
IsSearchable = true,
VectorSearchDimensions = modelDimensions,
VectorSearchProfileName = vectorSearchHnswProfile
},
new SearchField("Id", SearchFieldDataType.String)
{
IsSearchable = false
},
new SearchableField("Title"),
new SearchableField("Service")
{
IsFilterable = true
},
new SearchableField("Category")
{
IsFilterable = true
},
new SearchField("Author", SearchFieldDataType.String)
{
IsSearchable = false
},
new SearchField("Repository", SearchFieldDataType.String)
{
IsSearchable = false
},
new SearchField("CreatedAt", SearchFieldDataType.DateTimeOffset)
{
IsSearchable = false
},
new SearchField("Url", SearchFieldDataType.String)
{
IsSearchable = false
},
// 0 = false, 1 = true
// Used numbers to use the magnitude boosting function
new SearchField("CodeOwner", SearchFieldDataType.Int32)
{
IsSearchable = false,
IsSortable = false,
IsFilterable = true
},
new SearchField("metadata_storage_last_modified", SearchFieldDataType.DateTimeOffset)
{
IsHidden = true,
IsSearchable = false
}
}
};
// Scoring Boost for "Issue" objects that are comments made by the codeowner.
searchIndex.ScoringProfiles.Add(new ScoringProfile("CodeOwnerBoost")
{
Functions =
{
new MagnitudeScoringFunction(
fieldName: "CodeOwner",
boost: 5.0, // Adjust the boost factor as needed
parameters: new MagnitudeScoringParameters(1, 1)
{
ShouldBoostBeyondRangeByConstant = false,
})
{
Interpolation = ScoringFunctionInterpolation.Constant
}
}
});
return searchIndex;
}