def convert_dict_to_df()

in evaluation_pipeline/retrieval.py [0:0]


def convert_dict_to_df(retrieval_dict, query_lookup, ground_truth, ground_truth_urls, model_name, k):
    rows = []
    for query_id, retrievals in retrieval_dict.items():
        # Flatten each retrieval into a single row with column names based on retrieval index
        row = {'query_id': str(query_id)}
        retrieved_ids = []  # List to collect all retrieved IDs
        retrieved_distances = [] # collect the distances
        for i, retrieval in enumerate(retrievals, start=1):
            row[f'retrieval_{i}_id'] = retrieval.get('id')
            row[f'retrieval_{i}_title'] = retrieval.get('title')
            row[f'retrieval_{i}_url'] = retrieval.get('url')
            row[f'retrieval_{i}_combined_text'] = retrieval.get('combined_text')
            row[f'retrieval_{i}_distance'] = retrieval.get('distance')
            retrieved_ids.append(retrieval.get('id'))
            retrieved_distances.append(retrieval.get('distance'))
            # Collect the ID for the list

        row['retrieved_ids'] = retrieved_ids
        row['retrieved_distances'] = retrieved_distances
        row['model_name'] = model_name
        row['query'] = query_lookup[query_id]
        row['relevant_docs'] = ground_truth[query_id]
        row['relevant_urls'] = ground_truth_urls[query_id]
        row['k'] = k
        rows.append(row)
    df = pd.DataFrame(rows)
    return df