def _compute_answer_start()

in distant_supervision/synthetic_data_creator.py [0:0]


    def _compute_answer_start(self, *, answer_str, es_query, context):
        within_sent_start_pos_lst = find_all(es_query, answer_str)
        if not within_sent_start_pos_lst:
            raise DsDatasetCreationError('Cannot find start position for answer="{}" in es_query="{}"'.format(
                answer_str, es_query))

        sentence_start_pos_lst = find_all(context, es_query)  # should probably only have a single occurrence
        if not sentence_start_pos_lst:
            raise DsDatasetCreationError('Cannot find es_query="{}" in the following:\n{}'.format(
                es_query, context))

        start_pos_lst = []
        for sentence_start_pos in sentence_start_pos_lst:
            start_pos_lst.extend([pos + sentence_start_pos for pos in within_sent_start_pos_lst])

        for pos in start_pos_lst:
            # verify that it's correct
            if context[pos:pos + len(answer_str)] != answer_str:
                raise DsDatasetCreationError(
                    'inconsistent start_pos found {}'.format(
                        str(dict(
                            start_pos=pos,
                            sentence_start_pos_lst=sentence_start_pos_lst,
                            within_sent_start_pos_lst=within_sent_start_pos_lst,
                            answer_str=answer_str,
                            es_query=es_query,
                            context=context,
                        ))))

        return start_pos_lst