def search()

in supporting-blog-content/ElasticDocs_GPT/elasticdocs_gpt-summarize5.py [0:0]


def search(query_text, size):
    cid = os.environ["cloud_id"]
    cp = os.environ["cloud_pass"]
    cu = os.environ["cloud_user"]
    es = es_connect(cid, cu, cp)

    # Elasticsearch query (BM25) and kNN configuration for hybrid search
    query = {
        "bool": {
            "should": [
                {
                    "match": {
                        "title": {"query": query_text, "boost": 1, "analyzer": "stop"}
                    }
                },
                {"match": {"body_content": {"query": query_text, "boost": 2}}},
                {"match": {"product_name.stem": {"query": query_text, "boost": 5}}},
            ],
            "filter": [{"exists": {"field": "title-vector"}}],
        }
    }

    knn = {
        "field": "title-vector",
        "k": 1,
        "num_candidates": 20,
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "sentence-transformers__all-distilroberta-v1",
                "model_text": query_text,
            }
        },
        "boost": 1,
    }
    # compile list of filters, depending on checkboxes in UI
    productFilters = []
    if st.session_state["checkboxes"] != [None] * 10:
        for filter in st.session_state["checkboxes"]:
            if filter["state"]:
                productFilters.append(filter["name"])

        if productFilters != []:
            # add terms filter to query
            query["bool"]["filter"].append(
                {"terms": {"product_name.enum": productFilters}}
            )
            # add terms filter to knn
            knn["filter"] = {"terms": {"product_name.enum": productFilters}}

    agg = {
        "all_products": {
            "global": {},
            "aggs": {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "match": {
                                        "title": {
                                            "query": "how",
                                            "boost": 1,
                                            "analyzer": "stop",
                                        }
                                    }
                                }
                            ],
                            "filter": [{"exists": {"field": "title-vector"}}],
                        }
                    },
                    "aggs": {
                        "products": {
                            "terms": {"field": "product_name.enum", "size": 10}
                        }
                    },
                }
            },
        }
    }
    fields = ["title", "body_content", "url", "product_name"]
    index = "search-elastic-docs,search-elastic-docs-2"
    resp = es.search(
        index=index,
        query=query,
        knn=knn,
        fields=fields,
        size=size,
        source=False,
        aggs=agg,
    )
    return resp