def extract_problem_solution()

in src/open-r1-multimodal/local_scripts/prepare_hf_data.py [0:0]


def extract_problem_solution(gpt4o_response):
    # Split the response into parts
    parts = gpt4o_response.split("<think>")

    # Extract the problem (first part before any <think> tags)
    problem = parts[0].strip()
    # Remove "Question:" prefix if it exists
    problem = re.sub(r"^Question:\s*", "", problem)
    # Remove "Answer:" at the end of the problem
    problem = re.sub(r"\s*Answer:\s*$", "", problem).strip()

    # Combine all the reasoning steps into a single <think> block
    think_parts = [p.split("</think>")[0].strip() for p in parts[1:] if "</think>" in p]
    solution = f"<think>{' '.join(think_parts)}</think>"

    # Add the final answer if it exists, removing "Answer:" prefix
    if "<answer>" in gpt4o_response:
        final_answer = (
            gpt4o_response.split("<answer>")[-1].split("</answer>")[0].strip()
        )
        final_answer = re.sub(r"^Answer:\s*", "", final_answer)
        solution += f"\n\n<answer>{final_answer}</answer>"

    return problem, solution