in experiments/legacy/backend/nearest_neighbors.py [0:0]
def get_nn(
embeds: list[list[float]],
filters: list[str] = [],
num_neighbors: int = config.NUM_NEIGHBORS) -> list[Neighbor]:
"""Fetch nearest neigbhors in vector store.
Neighbors are fetched independently for each embedding then unioned.
Args:
embeds: list of embeddings to find neareast neighbors
filters: category prefix to restrict results to
- example 1: ['Mens']
will only return suggestiongs with top level category 'Mens'
- example 2: ['Mens', 'Pants']
will only return suggestions with top level category 'Mens'
and second level category 'Pants'
num_neigbhors: number of nearest neighbors to return for EACH embedding
Returns:
A list of named tuples containing the the following attributes
id: unique item identifier, usually used to join to a reference DB
distance: the embedding distance
"""
if len(filters) > config.CATEGORY_DEPTH:
logging.warning(f'''Number of category filters {len(filters)} is greater
than supported category depth {config.CATEGORY_DEPTH}. Truncating''')
filters = filters[:config.CATEGORY_DEPTH]
filters = [Namespace(config.FILTER_CATEGORIES[i],[f]) for i,f in enumerate(filters)]
response = index_endpoint.find_neighbors(
deployed_index_id=config.DEPLOYED_INDEX,
queries=embeds,
num_neighbors=num_neighbors,
filter=filters
)
return [Neighbor(r.id, r.distance) for neighbor in response for r in neighbor]