in extract_answers.py [0:0]
def process_answers(df: pd.DataFrame) -> pd.DataFrame:
"""Process each answer through the sympy extraction workflow."""
results = []
# Set up extraction config and get regexes
extraction_target = (ExprExtractionConfig(), LatexExtractionConfig())
for _, row in df.iterrows():
try:
# Extract answer using regexes
extracted = parse(row['answer'], extraction_config=extraction_target)
feedback = None
extracted_answer = None
if len(extracted) == 2:
extracted_answer = extracted[0]
feedback = extracted[1]
elif len(extracted) == 1:
extracted_answer = extracted[0]
else:
feedback = "No valid extraction found"
extracted_answer = None
result = {
'original_answer': row['answer'],
'extracted_answer': serialize_sympy_object(extracted_answer),
'extracted_feedback': feedback,
'extraction_success': extracted_answer is not None
}
# Copy any other columns from input
for col in df.columns:
if col != 'answer':
result[col] = row[col]
results.append(result)
except Exception as e:
results.append({
'original_answer': row['answer'],
'extracted_answer': '',
'extraction_success': False,
'error': str(e)
})
return pd.DataFrame(results)