def prepare_turkcorpus_lower()

in access/resources/prepare.py [0:0]


def prepare_turkcorpus_lower():
    dataset = 'turkcorpus_lower'
    with create_directory_or_skip(get_dataset_dir(dataset)):
        url = 'https://github.com/cocoxu/simplification.git'
        output_dir = Path(tempfile.mkdtemp())
        git_clone(url, output_dir)
        print(output_dir)
        print('Processing...')
        # Only rename files and put them in local directory architecture
        turkcorpus_lower_dir = output_dir / 'data/turkcorpus'
        print(turkcorpus_lower_dir)
        for (old_phase, new_phase) in [('test', 'test'), ('tune', 'valid')]:
            for (old_language_name, new_language_name) in [('norm', 'complex'), ('simp', 'simple')]:
                old_path = turkcorpus_lower_dir / f'{old_phase}.8turkers.tok.{old_language_name}'
                new_path = get_data_filepath('turkcorpus_lower', new_phase, new_language_name)
                shutil.copyfile(old_path, new_path)
                add_newline_at_end_of_file(new_path)
                shutil.move(replace_lrb_rrb_file(new_path), new_path)
            for i in range(8):
                old_path = turkcorpus_lower_dir / f'{old_phase}.8turkers.tok.turk.{i}'
                new_path = get_data_filepath('turkcorpus_lower', new_phase, 'simple.turk', i=i)
                shutil.copyfile(old_path, new_path)
                add_newline_at_end_of_file(new_path)
                shutil.move(replace_lrb_rrb_file(new_path), new_path)
        print('Done.')
    return dataset