in tensorflow_datasets/text_simplification/wiki_auto/wiki_auto.py [0:0]
def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
if self.builder_config.name == 'manual':
features = tfds.features.FeaturesDict({
'alignment_label':
tfds.features.ClassLabel(
names=['notAligned', 'aligned', 'partialAligned']),
'normal_sentence_id':
tfds.features.Text(),
'simple_sentence_id':
tfds.features.Text(),
'normal_sentence':
tfds.features.Text(),
'simple_sentence':
tfds.features.Text(),
'GLEU-score':
tf.float64,
})
elif (self.builder_config.name == 'auto_acl' or
self.builder_config.name == 'auto_full_no_split' or
self.builder_config.name == 'auto_full_with_split'):
features = tfds.features.FeaturesDict({
'normal_sentence': tfds.features.Text(),
'simple_sentence': tfds.features.Text(),
})
else:
features = tfds.features.FeaturesDict({
'example_id':
tfds.features.Text(),
'normal': {
'normal_article_id':
tf.int32,
'normal_article_title':
tfds.features.Text(),
'normal_article_url':
tfds.features.Text(),
'normal_article_content':
tfds.features.Sequence({
'normal_sentence_id': tfds.features.Text(),
'normal_sentence': tfds.features.Text(),
}),
},
'simple': {
'simple_article_id':
tf.int32,
'simple_article_title':
tfds.features.Text(),
'simple_article_url':
tfds.features.Text(),
'simple_article_content':
tfds.features.Sequence({
'simple_sentence_id': tfds.features.Text(),
'simple_sentence': tfds.features.Text(),
}),
},
'paragraph_alignment':
tfds.features.Sequence({
'normal_paragraph_id': tfds.features.Text(),
'simple_paragraph_id': tfds.features.Text(),
}),
'sentence_alignment':
tfds.features.Sequence({
'normal_sentence_id': tfds.features.Text(),
'simple_sentence_id': tfds.features.Text(),
}),
})
return tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
features=features,
supervised_keys=None,
homepage='https://github.com/chaojiang06/wiki-auto',
citation=_CITATION,
)