in src/open_r1/rewards.py [0:0]
def get_repetition_penalty_reward(ngram_size: int, max_penalty: float, language: str = "en"):
"""
Computes N-gram repetition penalty as described in Appendix C.2 of https://huggingface.co/papers/2502.03373.
Reference implementation from: https://github.com/eddycmu/demystify-long-cot/blob/release/openrlhf/openrlhf/reward/repetition.py
Args:
ngram_size: size of the n-grams
max_penalty: Maximum (negative) penalty for wrong answers
language: Language of the text, defaults to `en`. Used to choose the way to split the text into n-grams.
"""
if max_penalty > 0:
raise ValueError(f"max_penalty {max_penalty} should not be positive")
if language == "en":
def zipngram(text: str, ngram_size: int):
words = text.lower().split()
return zip(*[words[i:] for i in range(ngram_size)]), words
elif language == "zh":
from transformers.utils.import_utils import _is_package_available
if not _is_package_available("jieba"):
raise ValueError("Please install jieba to use Chinese language")
def zipngram(text: str, ngram_size: int):
import jieba
seg_list = list(jieba.cut(text))
return zip(*[seg_list[i:] for i in range(ngram_size)]), seg_list
else:
raise ValueError(
f"Word splitting for language `{language}` is not yet implemented. Please implement your own zip-ngram function."
)
def repetition_penalty_reward(completions, **kwargs) -> float:
"""
reward function the penalizes repetitions
ref implementation: https://github.com/eddycmu/demystify-long-cot/blob/release/openrlhf/openrlhf/reward/repetition.py
Args:
completions: List of model completions
"""
contents = [completion[0]["content"] for completion in completions]
rewards = []
for completion in contents:
if completion == "":
rewards.append(0.0)
continue
ngrams = set()
total = 0
ngram_array, words = zipngram(completion, ngram_size)
if len(words) < ngram_size:
rewards.append(0.0)
continue
for ng in ngram_array:
ngrams.add(ng)
total += 1
scaling = 1 - len(ngrams) / total
reward = scaling * max_penalty
rewards.append(reward)
return rewards
return repetition_penalty_reward