in access/resources/prepare.py [0:0]
def prepare_wikilarge():
dataset = 'wikilarge'
with create_directory_or_skip(get_dataset_dir(dataset)):
url = 'https://github.com/louismartin/dress-data/raw/master/data-simplification.tar.bz2'
extracted_path = download_and_extract(url)[0]
# Only rename files and put them in local directory architecture
for phase in PHASES:
for (old_language_name, new_language_name) in [('src', 'complex'), ('dst', 'simple')]:
old_path_glob = os.path.join(extracted_path, dataset, f'*.ori.{phase}.{old_language_name}')
globs = glob(old_path_glob)
assert len(globs) == 1
old_path = globs[0]
new_path = get_data_filepath(dataset, phase, new_language_name)
shutil.copyfile(old_path, new_path)
shutil.move(replace_lrb_rrb_file(new_path), new_path)
add_newline_at_end_of_file(new_path)
return dataset