in src/open_r1/rewards.py [0:0]
def get_reward_funcs(script_args) -> list[Callable]:
REWARD_FUNCS_REGISTRY = {
"accuracy": accuracy_reward,
"format": format_reward,
"reasoning_steps": reasoning_steps_reward,
"cosine": get_cosine_scaled_reward(
min_value_wrong=script_args.cosine_min_value_wrong,
max_value_wrong=script_args.cosine_max_value_wrong,
min_value_correct=script_args.cosine_min_value_correct,
max_value_correct=script_args.cosine_max_value_correct,
max_len=script_args.cosine_max_len,
),
"repetition_penalty": get_repetition_penalty_reward(
ngram_size=script_args.repetition_n_grams,
max_penalty=script_args.repetition_max_penalty,
),
"length": len_reward,
"code": update_wrapper(
partial(
code_reward,
num_parallel=script_args.parallel_code_exec_per_proc,
provider_type=script_args.code_provider,
enforce_same_language=getattr(script_args, "enforce_same_language", False),
),
code_reward,
),
"binary_code": update_wrapper(
partial(
binary_code_reward,
num_parallel=script_args.parallel_code_exec_per_proc,
provider_type=script_args.code_provider,
enforce_same_language=getattr(script_args, "enforce_same_language", False),
),
binary_code_reward,
),
"ioi_code": update_wrapper(
partial(
ioi_code_reward,
test_batch_size=script_args.code_eval_test_batch_size,
provider_type=getattr(script_args, "ioi_provider", "piston"),
),
ioi_code_reward,
),
"cf_code": update_wrapper(
partial(
cf_code_reward,
test_batch_size=script_args.code_eval_test_batch_size,
scoring_mode=script_args.code_eval_scoring_mode,
),
cf_code_reward,
),
"code_format": get_code_format_reward(language=script_args.code_language),
"tag_count": tag_count_reward,
"soft_overlong_punishment": get_soft_overlong_punishment(
max_completion_len=script_args.max_completion_len,
soft_punish_cache=script_args.soft_punish_cache,
),
}
reward_funcs = [REWARD_FUNCS_REGISTRY[func] for func in script_args.reward_funcs]
return reward_funcs