def _generate_examples()

in tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py [0:0]


  def _generate_examples(self, filepaths, split):
    """Yields examples."""

    if self.builder_config.name == 'manual':
      keys = [
          'alignment_label', 'simple_sentence_id', 'normal_sentence_id',
          'simple_sentence', 'normal_sentence', 'GLEU-score'
      ]

      with tf.io.gfile.GFile(filepaths[split]) as f:
        for id_, line in enumerate(f):
          values = line.strip().split('\t')
          dict_ = {}
          for k, v in zip(keys, values):
            dict_[k] = v
          yield id_, dict_

    elif (self.builder_config.name == 'auto_acl' or
          self.builder_config.name == 'auto_full_no_split' or
          self.builder_config.name == 'auto_full_with_split'):
      with tf.io.gfile.GFile(filepaths['normal']) as fi:
        with tf.io.gfile.GFile(filepaths['simple']) as fo:
          for id_, (norm_se, simp_se) in enumerate(zip(fi, fo)):
            yield id_, {
                'normal_sentence': norm_se,
                'simple_sentence': simp_se,
            }
    else:
      dataset_dict = json.load(tf.io.gfile.GFile(filepaths[split]))
      for id_, (eid, example_dict) in enumerate(dataset_dict.items()):
        res = {
            'example_id': eid,
            'normal': {
                'normal_article_id': example_dict['normal']['id'],
                'normal_article_title': example_dict['normal']['title'],
                'normal_article_url': example_dict['normal']['url'],
                'normal_article_content': {
                    'normal_sentence_id': [
                        sen_id for sen_id, sen_txt in example_dict['normal']
                        ['content'].items()
                    ],
                    'normal_sentence': [
                        sen_txt for sen_id, sen_txt in example_dict['normal']
                        ['content'].items()
                    ],
                },
            },
            'simple': {
                'simple_article_id': example_dict['simple']['id'],
                'simple_article_title': example_dict['simple']['title'],
                'simple_article_url': example_dict['simple']['url'],
                'simple_article_content': {
                    'simple_sentence_id': [
                        sen_id for sen_id, sen_txt in example_dict['simple']
                        ['content'].items()
                    ],
                    'simple_sentence': [
                        sen_txt for sen_id, sen_txt in example_dict['simple']
                        ['content'].items()
                    ],
                },
            },
            'paragraph_alignment': {
                'normal_paragraph_id': [
                    norm_id for simp_id, norm_id in example_dict.get(
                        'paragraph_alignment', [])
                ],
                'simple_paragraph_id': [
                    simp_id for simp_id, norm_id in example_dict.get(
                        'paragraph_alignment', [])
                ],
            },
            'sentence_alignment': {
                'normal_sentence_id': [
                    norm_id for simp_id, norm_id in example_dict.get(
                        'sentence_alignment', [])
                ],
                'simple_sentence_id': [
                    simp_id for simp_id, norm_id in example_dict.get(
                        'sentence_alignment', [])
                ],
            },
        }
        yield id_, res