in cached_classes.py [0:0]
def get_reward_model_scores(self, prompt: List[Dict[str, str]], completions: List[List[Dict[str, str]]]) -> List[float]:
completion_rewards = []
for idx_c, completion in enumerate(completions):
lookup_hash = (to_string(prompt), to_string(completion))
if lookup_hash in self.train_data:
completion_rewards.append(self.train_data[lookup_hash])
elif lookup_hash in self.test_data:
completion_rewards.append(self.test_data[lookup_hash])
else:
raise ValueError(f"No cached reward model score found for prompt: {prompt}, completion: {idx_c}, {completion}")
return completion_rewards