in evaluation_pipeline/retrieval.py [0:0]
def convert_dict_to_df(retrieval_dict, query_lookup, ground_truth, ground_truth_urls, model_name, k):
rows = []
for query_id, retrievals in retrieval_dict.items():
# Flatten each retrieval into a single row with column names based on retrieval index
row = {'query_id': str(query_id)}
retrieved_ids = [] # List to collect all retrieved IDs
retrieved_distances = [] # collect the distances
for i, retrieval in enumerate(retrievals, start=1):
row[f'retrieval_{i}_id'] = retrieval.get('id')
row[f'retrieval_{i}_title'] = retrieval.get('title')
row[f'retrieval_{i}_url'] = retrieval.get('url')
row[f'retrieval_{i}_combined_text'] = retrieval.get('combined_text')
row[f'retrieval_{i}_distance'] = retrieval.get('distance')
retrieved_ids.append(retrieval.get('id'))
retrieved_distances.append(retrieval.get('distance'))
# Collect the ID for the list
row['retrieved_ids'] = retrieved_ids
row['retrieved_distances'] = retrieved_distances
row['model_name'] = model_name
row['query'] = query_lookup[query_id]
row['relevant_docs'] = ground_truth[query_id]
row['relevant_urls'] = ground_truth_urls[query_id]
row['k'] = k
rows.append(row)
df = pd.DataFrame(rows)
return df