in src/sal/models/reward_models.py [0:0]
def make_step_rewards(logits, token_masks):
probabilities = F.softmax(logits, dim=-1)
probabilities = probabilities * token_masks.unsqueeze(
-1
) # bs, seq_len, num_labels
all_scores_res = []
for i in range(probabilities.size(0)):
sample = probabilities[i] # seq_len, num_labels
positive_probs = sample[sample != 0].view(-1, 2)[
:, 1
] # valid_tokens, num_labels
all_scores_res.append(positive_probs.cpu().tolist())
return all_scores_res