def retrieve()

in experiments/legacy/backend/attributes.py [0:0]


def retrieve(
    desc: str, 
    category: Optional[str] = None,
    image: Optional[str] = None, 
    base64: bool = False,
    num_neighbors: int = config.NUM_NEIGHBORS,
    filters: list[str] = []) -> list[dict]:
    """Returns list of attributes based on nearest neighbors.

    Embeds the provided desc and (optionally) image and returns the attributes
    corresponding to the closest products in embedding space. 
    
    Args:
        desc: user provided description of product
        category: category of the product
        image: can be local file path, GCS URI or base64 encoded image
        base64: True indicates image is base64. False (default) will be 
          interpreted as image path (either local or GCS)
        num_neigbhors: number of nearest neighbors to return for EACH embedding
        filters: category prefix to restrict results to

    Returns:
        List of candidates sorted by embedding distance. Each candidate is a
        dict with the following keys:
            id: product ID
            attributes: attributes in dict form e.g. {'color':'green', 'pattern': 'striped'}
            description: string describing product
            distance: embedding distance in range [0,1], 0 being the closest match
    """
    res = embeddings.embed(desc,image, base64)
    embeds = [res.text_embedding, res.image_embedding] if res.image_embedding else [res.text_embedding]
    neighbors = nearest_neighbors.get_nn(embeds,filters)
    if not neighbors:
      return []
    ids = [n.id[:-2] for n in neighbors] # last 3 chars are not part of product ID
    attributes_desc = join_attributes_desc(ids)
    candidates = [
        {'attributes':attributes_desc[n.id[:-2]]['attributes'],
        'description':attributes_desc[n.id[:-2]]['description'],
        'id':n.id, 
        'distance':n.distance} for n in neighbors]
    return sorted(candidates, key=lambda d: d['distance'])