def query_es_index()

in longform-qa/lfqa_utils.py [0:0]


def query_es_index(question, es_client, index_name="english_wiki_kilt_snippets_100w", n_results=10, min_length=20):
    q = question.lower()
    banned = ["how", "why", "what", "where", "which", "do", "does", "is", "?", "eli5", "eli5:"]
    q = " ".join([w for w in q.split() if w not in banned])
    response = es_client.search(
        index=index_name,
        body={
            "query": {
                "multi_match": {
                    "query": q,
                    "fields": ["article_title", "section_title", "passage_text^2"],
                    "type": "cross_fields",
                }
            },
            "size": 2 * n_results,
        },
    )
    hits = response["hits"]["hits"]
    support_doc = "<P> " + " <P> ".join([hit["_source"]["passage_text"] for hit in hits])
    res_list = [dict([(k, hit["_source"][k]) for k in hit["_source"] if k != "passage_text"]) for hit in hits]
    for r, hit in zip(res_list, hits):
        r["passage_id"] = hit["_id"]
        r["score"] = hit["_score"]
        r["passage_text"] = hit["_source"]["passage_text"]
    res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results]
    return support_doc, res_list