in ml/eval/reward_eval.py [0:0]
def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
def default(x):
return x.squeeze().cpu().detach().numpy().tolist()
reward_fn_map = {
'0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
'1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
'1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
}
reward_output_fn = reward_fn_map.get(reward_output_format, default)
if sigmoid:
return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
return reward_output_fn