def _generate_examples()

in tensorflow_datasets/summarization/wikihow.py [0:0]


  def _generate_examples(self, path=None, title_set=None):
    """Yields examples."""
    with tf.io.gfile.GFile(path) as f:
      reader = csv.reader(f)
      headers = next(reader)
      if self.builder_config.name == "all" and headers != [
          "headline", "title", "text"
      ]:
        raise ValueError("Mismatched header in WikiAll.txt")
      if self.builder_config.name == "sep" and headers != [
          "overview", "headline", "text", "sectionLabel", "title"
      ]:
        raise ValueError("Mismatched header in WikiSep.txt")
      key2id = {key: i for i, key in enumerate(headers)}
      for i, line in enumerate(reader):
        # skip empty line or insufficient line.
        if len(line) == len(key2id):
          summary = line[key2id[_SUMMARY]].strip()
          document = line[key2id[_DOCUMENT]].strip()
          summary, document = _filter_and_clean(summary, document)
          if summary and document:
            if line[key2id["title"]].strip().replace(" ", "") in title_set:
              d = {
                  k: line[v].strip()
                  for k, v in key2id.items()
                  if k not in [_SUMMARY, _DOCUMENT]
              }
              d[_DOCUMENT] = document
              d[_SUMMARY] = summary
              yield i, d