in src/open-r1-multimodal/src/open_r1/grpo_rec.py [0:0]
def format_reward(completions, **kwargs):
"""Reward function that checks if the completion has a specific format."""
pattern = r'<think>.*?</think>.*?<answer>.*?</answer>'
completion_contents = [completion[0]["content"] for completion in completions]
matches = [re.fullmatch(pattern, content, re.DOTALL) for content in completion_contents]
return [1.0 if match else 0.0 for match in matches]