in src/open_r1/rewards.py [0:0]
def get_code_format_reward(language: str = "python"):
"""Format reward function specifically for code responses.
Args:
language: Programming language supported by E2B https://e2b.dev/docs/code-interpreting/supported-languages
"""
def code_format_reward(completions, **kwargs):
# if there is a language field, use it instead of the default language. This way we can have mixed language training.
languages = kwargs["language"] if "language" in kwargs else [language] * len(completions)
completion_contents = [completion[0]["content"] for completion in completions]
matches = [
re.match(
rf"^<think>\n.*?\n</think>\n<answer>\n.*?```{sample_language}.*?```.*?\n</answer>$",
content,
re.DOTALL | re.MULTILINE,
)
for content, sample_language in zip(completion_contents, languages)
]
return [1.0 if match else 0.0 for match in matches]
return code_format_reward