def len_reward()

in src/open-r1-multimodal/src/open_r1/grpo_rec.py [0:0]


def len_reward(completions, solution, L=500, gamma=0.9, **kwargs):
    contents = [completion[0]["content"] for completion in completions]
    lengths = [len(content) for content in contents]

    rewards = []
    punctuation_counts = [sum([1 for c in text if c in ',.;!']) for text in contents]
    for length, punctuation_count in zip(lengths, punctuation_counts):
        density = punctuation_count / (length + 1e-6)

        dynamic_alpha = gamma ** (length//L)

        if length <= L:
            base = length * (1 + math.sin(math.pi*length/(2*L)))
        else:
            base = 2*L + math.log(length-L+1)

        reward = base * dynamic_alpha * (0.5 + 0.5*density)
        rewards.append(reward)

    return [x / 500 for x in rewards]