in src/open-r1-multimodal/local_scripts/prepare_hf_data.py [0:0]
def extract_problem_solution(gpt4o_response):
# Split the response into parts
parts = gpt4o_response.split("<think>")
# Extract the problem (first part before any <think> tags)
problem = parts[0].strip()
# Remove "Question:" prefix if it exists
problem = re.sub(r"^Question:\s*", "", problem)
# Remove "Answer:" at the end of the problem
problem = re.sub(r"\s*Answer:\s*$", "", problem).strip()
# Combine all the reasoning steps into a single <think> block
think_parts = [p.split("</think>")[0].strip() for p in parts[1:] if "</think>" in p]
solution = f"<think>{' '.join(think_parts)}</think>"
# Add the final answer if it exists, removing "Answer:" prefix
if "<answer>" in gpt4o_response:
final_answer = (
gpt4o_response.split("<answer>")[-1].split("</answer>")[0].strip()
)
final_answer = re.sub(r"^Answer:\s*", "", final_answer)
solution += f"\n\n<answer>{final_answer}</answer>"
return problem, solution