in src/open-r1-multimodal/src/open_r1/grpo_rec.py [0:0]
def len_reward(completions, solution, L=500, gamma=0.9, **kwargs):
contents = [completion[0]["content"] for completion in completions]
lengths = [len(content) for content in contents]
rewards = []
punctuation_counts = [sum([1 for c in text if c in ',.;!']) for text in contents]
for length, punctuation_count in zip(lengths, punctuation_counts):
density = punctuation_count / (length + 1e-6)
dynamic_alpha = gamma ** (length//L)
if length <= L:
base = length * (1 + math.sin(math.pi*length/(2*L)))
else:
base = 2*L + math.log(length-L+1)
reward = base * dynamic_alpha * (0.5 + 0.5*density)
rewards.append(reward)
return [x / 500 for x in rewards]