in generation/llm_swarm_script.py [0:0]
def process_outputs_to_df(df):
all_data = []
for index, row in df.iterrows():
task = row['Task']
completion = row['Completion']
sample_key = task['__key__']
page_count = task['Page count']
prompt_id = task['Prompt ID']
qa_pairs = extract_qa_pairs(completion)
if len(qa_pairs) == 0:
print('No Q&A pairs found for sample:', sample_key)
for question, answer in qa_pairs:
all_data.append({
'__key__': sample_key,
'Page count': page_count,
'Prompt ID': prompt_id,
'question': question,
'answer': answer
})
qa_df = pd.DataFrame(all_data)
return qa_df