in train_rick/run_grpo.py [0:0]
def correctness_reward(completions, solutions, **kwargs):
rewards = []
for completion, ground_truths in zip(completions, solutions):
content = completion[0]["content"]
matches = [ground_truth in content for ground_truth in ground_truths]
reward = 1.0 if any(matches) else 0.0
rewards.append(reward)
return rewards