in tensorflow_datasets/text/glue.py [0:0]
def _split_generators(self, dl_manager):
if self.builder_config.name == "ax":
data_file = dl_manager.download(self.builder_config.data_url)
return [
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
gen_kwargs={
"data_file": data_file,
"split": "test",
})
]
if self.builder_config.name == "mrpc":
data_dir = None
mrpc_files = dl_manager.download({
"dev_ids": _MRPC_DEV_IDS,
"train": _MRPC_TRAIN,
"test": _MRPC_TEST,
})
else:
dl_dir = dl_manager.download_and_extract(self.builder_config.data_url)
data_dir = os.path.join(dl_dir, self.builder_config.data_dir)
mrpc_files = None
train_split = tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
gen_kwargs={
"data_file": os.path.join(data_dir or "", "train.tsv"),
"split": "train",
"mrpc_files": mrpc_files,
})
if self.builder_config.name == "mnli":
return [
train_split,
_mnli_split_generator(
"validation_matched", data_dir, "dev", matched=True),
_mnli_split_generator(
"validation_mismatched", data_dir, "dev", matched=False),
_mnli_split_generator("test_matched", data_dir, "test", matched=True),
_mnli_split_generator(
"test_mismatched", data_dir, "test", matched=False)
]
elif self.builder_config.name == "mnli_matched":
return [
_mnli_split_generator("validation", data_dir, "dev", matched=True),
_mnli_split_generator("test", data_dir, "test", matched=True)
]
elif self.builder_config.name == "mnli_mismatched":
return [
_mnli_split_generator("validation", data_dir, "dev", matched=False),
_mnli_split_generator("test", data_dir, "test", matched=False)
]
else:
return [
train_split,
tfds.core.SplitGenerator(
name=tfds.Split.VALIDATION,
gen_kwargs={
"data_file": os.path.join(data_dir or "", "dev.tsv"),
"split": "dev",
"mrpc_files": mrpc_files,
}),
tfds.core.SplitGenerator(
name=tfds.Split.TEST,
gen_kwargs={
"data_file": os.path.join(data_dir or "", "test.tsv"),
"split": "test",
"mrpc_files": mrpc_files,
}),
]