def fairseq_preprocess()

in access/fairseq/base.py [0:0]


def fairseq_preprocess(dataset):
    dataset_dir = get_dataset_dir(dataset)
    with lock_directory(dataset_dir):
        preprocessed_dir = dataset_dir / 'fairseq_preprocessed'
        with create_directory_or_skip(preprocessed_dir):
            preprocessing_parser = options.get_preprocessing_parser()
            preprocess_args = preprocessing_parser.parse_args([
                '--source-lang',
                'complex',
                '--target-lang',
                'simple',
                '--trainpref',
                os.path.join(dataset_dir, f'{dataset}.train'),
                '--validpref',
                os.path.join(dataset_dir, f'{dataset}.valid'),
                '--testpref',
                os.path.join(dataset_dir, f'{dataset}.test'),
                '--destdir',
                str(preprocessed_dir),
                '--output-format',
                'raw',
            ])
            preprocess.main(preprocess_args)
        return preprocessed_dir