def process_outputs_to_df()

in generation/llm_swarm_script.py [0:0]


def process_outputs_to_df(df):
    all_data = []

    for index, row in df.iterrows():
        task = row['Task']
        completion = row['Completion']

        sample_key = task['__key__']
        page_count = task['Page count']
        prompt_id = task['Prompt ID']

        qa_pairs = extract_qa_pairs(completion)
        if len(qa_pairs) == 0:
            print('No Q&A pairs found for sample:', sample_key)

        for question, answer in qa_pairs:
            all_data.append({
                '__key__': sample_key,
                'Page count': page_count,
                'Prompt ID': prompt_id,
                'question': question,
                'answer': answer
            })

    qa_df = pd.DataFrame(all_data)
    return qa_df