def make_step_rewards()

in src/sal/models/reward_models.py [0:0]


    def make_step_rewards(logits, token_masks):
        probabilities = F.softmax(logits, dim=-1)
        probabilities = probabilities * token_masks.unsqueeze(
            -1
        )  # bs, seq_len, num_labels

        all_scores_res = []
        for i in range(probabilities.size(0)):
            sample = probabilities[i]  # seq_len, num_labels
            positive_probs = sample[sample != 0].view(-1, 2)[
                :, 1
            ]  # valid_tokens, num_labels
            all_scores_res.append(positive_probs.cpu().tolist())

        return all_scores_res