supporting-blog-content/building-a-recipe-search-with-elasticsearch/search.py (84 lines of code) (raw):

import pandas as pd from elasticsearch_connection import ElasticsearchConnection es_client = ElasticsearchConnection().get_client() term = "seafood for grilling" size = 5 def format_text(description, line_length=120): words = description.split() if len(words) <= line_length: return description else: return " ".join(words[:line_length]) + "..." def search_semantic(term): result = [] response = es_client.search( index="grocery-catalog-elser", size=size, source_excludes="description_embedding", query={"semantic": {"field": "description_embedding", "query": term}}, ) for hit in response["hits"]["hits"]: score = hit["_score"] name = format_text(hit["_source"]["name"], line_length=10) description = hit["_source"]["description"] formatted_description = format_text(description) result.append( { "score": score, "name": name, "description": formatted_description, } ) return result def search_lexical(term): result = [] response = es_client.search( index="grocery-catalog-elser", size=size, source_excludes="description_embedding", query={"multi_match": {"query": term, "fields": ["name", "description"]}}, ) for hit in response["hits"]["hits"]: score = hit["_score"] name = format_text(hit["_source"]["name"], line_length=10) description = hit["_source"]["description"] result.append( { "score": score, "name": name, "description": description, } ) return result if __name__ == "__main__": rs1 = search_semantic(term) rs2 = search_lexical(term) df1 = ( pd.DataFrame(rs1)[["name", "score"]] if rs1 else pd.DataFrame(columns=["name", "score"]) ) df2 = ( pd.DataFrame(rs2)[["name", "score"]] if rs2 else pd.DataFrame(columns=["name", "score"]) ) df1 = ( pd.DataFrame(rs1)[["name", "score"]] if rs1 else pd.DataFrame(columns=["name", "score"]) ) df1["Search Type"] = "Semantic" df2 = ( pd.DataFrame(rs2)[["name", "score"]] if rs2 else pd.DataFrame(columns(["name", "score"])) ) df2["Search Type"] = "Lexical" tabela = pd.concat([df1, df2], axis=0).reset_index(drop=True) tabela = tabela[["Search Type", "name", "score"]] tabela.columns = ["Search Type", "Name", "Score"] tabela["Search Type"] = tabela["Search Type"].astype(str).str.ljust(0) tabela["Name"] = tabela["Name"].astype(str).str.ljust(15) tabela["Score"] = tabela["Score"].astype(str).str.ljust(5) print(tabela.to_string(index=False))