in supporting-blog-content/ElasticDocs_GPT/elasticdocs_gpt-summarize5.py [0:0]
def search(query_text, size):
cid = os.environ["cloud_id"]
cp = os.environ["cloud_pass"]
cu = os.environ["cloud_user"]
es = es_connect(cid, cu, cp)
# Elasticsearch query (BM25) and kNN configuration for hybrid search
query = {
"bool": {
"should": [
{
"match": {
"title": {"query": query_text, "boost": 1, "analyzer": "stop"}
}
},
{"match": {"body_content": {"query": query_text, "boost": 2}}},
{"match": {"product_name.stem": {"query": query_text, "boost": 5}}},
],
"filter": [{"exists": {"field": "title-vector"}}],
}
}
knn = {
"field": "title-vector",
"k": 1,
"num_candidates": 20,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-distilroberta-v1",
"model_text": query_text,
}
},
"boost": 1,
}
# compile list of filters, depending on checkboxes in UI
productFilters = []
if st.session_state["checkboxes"] != [None] * 10:
for filter in st.session_state["checkboxes"]:
if filter["state"]:
productFilters.append(filter["name"])
if productFilters != []:
# add terms filter to query
query["bool"]["filter"].append(
{"terms": {"product_name.enum": productFilters}}
)
# add terms filter to knn
knn["filter"] = {"terms": {"product_name.enum": productFilters}}
agg = {
"all_products": {
"global": {},
"aggs": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"match": {
"title": {
"query": "how",
"boost": 1,
"analyzer": "stop",
}
}
}
],
"filter": [{"exists": {"field": "title-vector"}}],
}
},
"aggs": {
"products": {
"terms": {"field": "product_name.enum", "size": 10}
}
},
}
},
}
}
fields = ["title", "body_content", "url", "product_name"]
index = "search-elastic-docs,search-elastic-docs-2"
resp = es.search(
index=index,
query=query,
knn=knn,
fields=fields,
size=size,
source=False,
aggs=agg,
)
return resp