def get_reward_output_fn()

in ml/eval/reward_eval.py [0:0]


def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
    def default(x): 
        return x.squeeze().cpu().detach().numpy().tolist()
    reward_fn_map = {
        '0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
        '1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
        '1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
    }
    reward_output_fn = reward_fn_map.get(reward_output_format, default)
    if sigmoid: 
        return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
    return reward_output_fn