def format_reward()

in src/open-r1-multimodal/src/open_r1/grpo.py [0:0]

5 lines of code
5 McCabe index (conditional complexity)


def format_reward(completions, **kwargs):
    """Reward function that checks if the completion has a specific format."""
    pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
    completion_contents = [completion[0]["content"] for completion in completions]
    matches = [re.match(pattern, content) for content in completion_contents]
    return [1.0 if match else 0.0 for match in matches]