def single_logprob_parser()

in src/alpaca_eval/completion_parsers.py [0:0]


    def single_logprob_parser(top_logprobs: list[dict[str, Any]]) -> float:
        map_tokens_to_logprobs = {
            t["token"]: t["logprob"] for t in top_logprobs if t["token"] in denominator_tokens + [numerator_token]
        }

        # if it's not present we say it's probability is 0, which only makes sense if at least one is present
        missing = float("-inf")
        if len(map_tokens_to_logprobs) == 0:
            logging.warning(f"Cannot find any logprobs from {denominator_tokens + [numerator_token]} in {completion}.")
            return np.nan

        baseline_logprob = map_tokens_to_logprobs.get(numerator_token, missing)
        denominator_logprob = logsumexp([map_tokens_to_logprobs.get(t, missing) for t in denominator_tokens])

        if is_binarize:
            # in the binary case, we want to know whether the baseline token has a higher logprob than all the others
            denominator_not_numerator_tokens = [t for t in denominator_tokens if t != numerator_token]
            denominator_not_baseline_logprobs = [
                map_tokens_to_logprobs.get(t, missing) for t in denominator_not_numerator_tokens
            ]
            is_baseline_best = all([baseline_logprob > t for t in denominator_not_baseline_logprobs])
            out = 1 if is_baseline_best else 2

        else:
            out_logprob = baseline_logprob - denominator_logprob  # typecheck doesn't recognize it's a float
            probability = np.exp(out_logprob)
            # if you have probability 1 of preferring first then say 1 if 0 say 2
            # output between 1 and 2 for historical reasons
            out = 2 - probability

        return out