in tensorflow_datasets/summarization/wikihow.py [0:0]
def _generate_examples(self, path=None, title_set=None):
"""Yields examples."""
with tf.io.gfile.GFile(path) as f:
reader = csv.reader(f)
headers = next(reader)
if self.builder_config.name == "all" and headers != [
"headline", "title", "text"
]:
raise ValueError("Mismatched header in WikiAll.txt")
if self.builder_config.name == "sep" and headers != [
"overview", "headline", "text", "sectionLabel", "title"
]:
raise ValueError("Mismatched header in WikiSep.txt")
key2id = {key: i for i, key in enumerate(headers)}
for i, line in enumerate(reader):
# skip empty line or insufficient line.
if len(line) == len(key2id):
summary = line[key2id[_SUMMARY]].strip()
document = line[key2id[_DOCUMENT]].strip()
summary, document = _filter_and_clean(summary, document)
if summary and document:
if line[key2id["title"]].strip().replace(" ", "") in title_set:
d = {
k: line[v].strip()
for k, v in key2id.items()
if k not in [_SUMMARY, _DOCUMENT]
}
d[_DOCUMENT] = document
d[_SUMMARY] = summary
yield i, d