in ml/eval/reward_eval.py [0:0]
def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
"""
Evaluate the dataset using the reward model.
"""
reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards")
rewards_list = []
for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]
# Create prompt-response pairs
batch_full_outputs = [
f"{l['prompt']} {l['output']}" for l in batch_list_dict
] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict]
# Tokenize reponse and send to device
encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
encoded_full_responses = encoded_full_responses.to(model.device)
# Generate rewards
with torch.inference_mode():
reward_outputs = model(**encoded_full_responses)
rewards = reward_output_fn(reward_outputs.logits)
rewards_list.extend(rewards)
pbar.update(len(batch_list_dict))
# Adding reward scores to original data
for i, data in enumerate(eval_data_list_dict):
data['reward'] = rewards_list[i]
return eval_data_list_dict