in src/tab_title_tuning_data.py [0:0]
def get_meta_info_for_task(self, one_test, task_id, max_docs):
docs_df = one_test[one_test.task_id == task_id].reset_index(drop=True)
docs = docs_df["title"].to_list()
random.shuffle(docs)
docs = docs[:max_docs]
descriptions = None
# keyword generator
if len(docs) > 1:
key_doc_finder_ai = KeyDocumentFinder(docs_df, "task_id", "title")
key_doc_finder_ai.compute_all(include_embeddings=False)
keywords = key_doc_finder_ai.get_keywords_for_group(task_id)[:3]
else:
keywords = []
if len(docs) == 1 and "description" in docs_df.columns:
descriptions = docs_df["description"].to_list()[:1]
return {"documents": docs, "keywords": keywords, "descriptions": descriptions}