in src/open-r1-multimodal/src/open_r1/grpo_rec.py [0:0]
def accuracy_reward(completions, solution, **kwargs):
pattern = r'<answer>(.*?)</answer>'
contents = [completion[0]["content"] for completion in completions]
if dist.get_rank() == 0:
print("\n******************************************************response***************************************************\n", contents[0], "\n******************************************************response***************************************************\n".replace("*", "-"))
rewards = []
for content, sol in zip(contents, solution):
reward = 0.0
content_answer_match = re.search(pattern, content, re.DOTALL)
if content_answer_match:
content_answer = content_answer_match.group(1).strip()
reward = (bleu(content_answer, sol) + rouge(content_answer, sol)) / 2
rewards.append(reward)
return rewards