in longform-qa/lfqa_utils.py [0:0]
def query_es_index(question, es_client, index_name="english_wiki_kilt_snippets_100w", n_results=10, min_length=20):
q = question.lower()
banned = ["how", "why", "what", "where", "which", "do", "does", "is", "?", "eli5", "eli5:"]
q = " ".join([w for w in q.split() if w not in banned])
response = es_client.search(
index=index_name,
body={
"query": {
"multi_match": {
"query": q,
"fields": ["article_title", "section_title", "passage_text^2"],
"type": "cross_fields",
}
},
"size": 2 * n_results,
},
)
hits = response["hits"]["hits"]
support_doc = "<P> " + " <P> ".join([hit["_source"]["passage_text"] for hit in hits])
res_list = [dict([(k, hit["_source"][k]) for k in hit["_source"] if k != "passage_text"]) for hit in hits]
for r, hit in zip(res_list, hits):
r["passage_id"] = hit["_id"]
r["score"] = hit["_score"]
r["passage_text"] = hit["_source"]["passage_text"]
res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results]
return support_doc, res_list