def _construct_dataset_sample()

in distant_supervision/synthetic_data_creator.py [0:0]


    def _construct_dataset_sample(self, *, qpa, hit, hit_phrases, es_query, es_rank, backfill_article, backfill_sent, rng):
        qid = random_str(16)

        context = qpa.article_raw

        answer_str = qpa.phrase.phrase_str
        answer_start_lst = self._compute_answer_start(
            answer_str=answer_str,
            es_query=es_query,
            context=context)

        if len(answer_start_lst) == 0:
            raise DsDatasetCreationError('Did not find any answer_start answer={}: {}\n{}'.format(
                answer_str, es_query, context))

        answers = [{'text': answer_str, 'answer_start': pos} for pos in answer_start_lst]

        styled_questions = self._make_styled_questions(
            qpa=qpa,
            es_hit=hit,
            answer_str=answer_str,
            rng=rng)

        datum = DsDatum(
            qid=qid,
            styled_questions=styled_questions,
            answers=answers,
            context=context)

        datum.meta = {
            "es_query": es_query,
            "es_rank": es_rank,
            "es_score": hit['_score'],
            "answer_phrase_category": qpa.phrase.phrase_category,
            "context": {
                "article_id": qpa.article_id,
                "article_title": qpa.article_title,
            },
            "question": {
                "article_id": int(hit['_source']['article_id']),
                "article_title": hit['_source']['article_title'],
                "phrases": hit_phrases,
            },
            "backfill_nb_articles": backfill_article,
            "backfill_nb_sents": backfill_sent,
        }

        return datum