utils_nlp/models/transformers/question_answering.py [745:834]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    verbose_logging=False,
):
    """
    Postprocesses start and end logits
    generated by :meth:`AnswerExtractor.fit` for BERT.

    Args:
        results (list): List of :class:`QAResult`.
        examples_file (str): One of the files cached by :meth:`QAProcessor.preprocess`.
            This file contains the original document tokens that are used to generate
            the final answers from the predicted start and end positions.
        features_file (str): One of the files cached by :meth:`QAProcessor.preprocess`.
            This file contains the mapping from indices in the processed token list
            to the original document tokens that are used to generate the final
            predicted answers.
        do_lower_case (bool): Whether an uncased tokenizer was used during
            data preprocessing. This is required during answer finalization
            by comparing the predicted answer text and the original
            text span in :func:`_get_final_text`.
        unanswerable_exists (bool, optional): Whether there are unanswerable
            questions in the data. If True, the start and end logits of the [CLS]
            token, which indicate the probability of the answer being empty,
            are included in the candidate answer list.
            Defaults to False.
        n_best_size (int, optional): The number of candidates to choose from each
            QAResult to generate the final prediction. It's also the maximum number
            of n-best answers to output for each question.
            Note that the number of n-best answers can be smaller than `n_best_size`
            because some unqualified answers,
            e.g. answer that are too long, are removed.
        max_answer_length (int, optional): Maximum length of the answer. Defaults to 30.
        output_prediction_file (str, optional): Path of the file to save the
            predicted answers. Defaults to "./qa_predictions.json".
        output_nbest_file (str, optional): Path of the file to save the n-best answers.
            Defaults to "./nbest_predictions.json".
        output_null_log_odds_file (str, optional): If unanswerable_exists is True,
            the score difference between empty prediction and best non-empty prediction
            are saved to this file. These scores can be used to find the best threshold
            for predicting an empty answer. Defaults to "./null_odds.json".
        null_score_diff_threshold (float, optional): If unanswerable_exists=True
            and the score difference between empty prediction and best non-empty
            prediction is higher than this threshold, the final predicted
            answer is empty.
            Defaults to 0.0.
        verbose_logging (bool, optional): Whether to log details of
            answer postprocessing. Defaults to False.

    Returns:
        tuple: (OrderedDict, OrderedDict, OrderedDict)
            The keys of the dictionaries are the `qa_id` in the original
            :class:`utils_nlp.dataset.pytorch.QADataset`
            The values of the first dictionary are the predicted answer texts
            in string type. The values of the second dictionary are the softmax
            probabilities of the predicted answers.
            The values of the third dictionary are the n-best answers for each qa_id.
            Note that the number of n-best answers can be smaller than `n_best_size`
            because some unqualified answers, e.g. answers that are too long,
            are removed.

    """
    with jsonlines.open(examples_file) as reader:
        examples_all = list(reader.iter())

    with jsonlines.open(features_file) as reader:
        features_all = list(reader.iter())

    qa_id_to_features = collections.defaultdict(list)
    # Map unique features to the original doc-question-answer triplet
    # Each doc-question-answer triplet can have multiple features because the doc
    # could be split into multiple spans
    for f in features_all:
        qa_id_to_features[f["qa_id"]].append(f)

    unique_id_to_result = {}
    for r in results:
        unique_id_to_result[r.unique_id] = r

    all_predictions = collections.OrderedDict()
    all_probs = collections.OrderedDict()
    all_nbest_json = collections.OrderedDict()
    scores_diff_json = collections.OrderedDict()

    for example in examples_all:
        # get all the features belonging to the same example,
        # i.e. paragaraph/question pair.
        features = qa_id_to_features[example["qa_id"]]

        prelim_predictions = []
        # keep track of the minimum score of null start+end of position 0
        score_null = 1000000  # large and positive
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


utils_nlp/models/transformers/question_answering.py [1042:1125]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    verbose_logging=False,
):
    """
    Postprocesses start and end logits generated by :meth:`AnswerExtractor.fit`
        for XLNet.

    Args:
        results (list): List of :class:`QAResultExtended`.
        examples_file (str): One of the files cached by :meth:`QAProcessor.preprocess`.
            This file contains the original document tokens that are used to generate
            the final answers from the predicted start and end positions.
        features_file (str): One of the files cached by :meth:`QAProcessor.preprocess`.
            This file contains the mapping from indices in the processed token list to
            the original document tokens that are used to generate the final
            predicted answers.
        tokenizer (XLNetTokenizer): Tokenizer used during data preprocessing.
        n_best_size (int, optional): The number of candidates to choose from each
            QAResult to generate the final prediction. It's also the maximum number
            of n-best answers to output for each question.
            Note that the number of n-best answers can be smaller than `n_best_size`
            because some unqualified answers, e.g. answer that are too long are removed.
        n_top_start (int, optional): Beam size for span start. Note that this needs to
            be consistent with the XLNet model configuration. Defaults to 5.
        n_top_end (int, optional): Beam size for span end. Note that this needs to be
            consistent with the XLNet model configuration. Defaults to 5.
        max_answer_length (int, optional): Maximum length of the answer. Defaults to 30.
        unanswerable_exists (bool, optional): Whether there are unanswerable questions
            in the data. If True, the start and end logits of the [CLS] token, which
            indicate the probability of the answer being empty, are included in the
            candidate answer list.
            Defaults to False.
        output_prediction_file (str, optional): Path of the file to save the
            predicted answers. Defaults to "./qa_predictions.json".
        output_nbest_file (str, optional): Path of the file to save the n-best answers.
            Defaults to "./nbest_predictions.json".
        output_null_log_odds_file (str, optional): If unanswerable_exists is True,
            the score difference between empty prediction and best non-empty prediction
            are saved to this file. These scores can be used to find the best threshold
            for predicting an empty answer. Defaults to "./null_odds.json".
        verbose_logging (bool, optional): Whether to log details of answer
            postprocessing. Defaults to False.

    Returns:
        tuple: (OrderedDict, OrderedDict, OrderedDict)
            The keys of the dictionaries are the `qa_id` in the original
            :class:`utils_nlp.dataset.pytorch.QADataset`
            The values of the first dictionary are the predicted answer texts in
                string type.
            The values of the second dictionary are the softmax probabilities of
                the predicted answers.
            The values of the third dictionary are the n-best answers for each qa_id.
            Note that the number of n-best answers can be smaller than `n_best_size`
            because some unqualified answers, e.g. answers that are too
            long are removed.

    """
    with jsonlines.open(examples_file) as reader:
        examples_all = list(reader.iter())

    with jsonlines.open(features_file) as reader:
        features_all = list(reader.iter())

    qa_id_to_features = collections.defaultdict(list)
    # Map unique features to the original doc-question-answer triplet
    # Each doc-question-answer triplet can have multiple features because the doc
    # could be split into multiple spans
    for f in features_all:
        qa_id_to_features[f["qa_id"]].append(f)

    unique_id_to_result = {}
    for r in results:
        unique_id_to_result[r.unique_id] = r

    all_predictions = collections.OrderedDict()
    all_probs = collections.OrderedDict()
    all_nbest_json = collections.OrderedDict()
    scores_diff_json = collections.OrderedDict()

    for example in examples_all:
        features = qa_id_to_features[example["qa_id"]]

        prelim_predictions = []
        # keep track of the minimum score of null start+end of position 0
        score_null = 1000000  # large and positive
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -