in access/fairseq/base.py [0:0]
def fairseq_preprocess(dataset):
dataset_dir = get_dataset_dir(dataset)
with lock_directory(dataset_dir):
preprocessed_dir = dataset_dir / 'fairseq_preprocessed'
with create_directory_or_skip(preprocessed_dir):
preprocessing_parser = options.get_preprocessing_parser()
preprocess_args = preprocessing_parser.parse_args([
'--source-lang',
'complex',
'--target-lang',
'simple',
'--trainpref',
os.path.join(dataset_dir, f'{dataset}.train'),
'--validpref',
os.path.join(dataset_dir, f'{dataset}.valid'),
'--testpref',
os.path.join(dataset_dir, f'{dataset}.test'),
'--destdir',
str(preprocessed_dir),
'--output-format',
'raw',
])
preprocess.main(preprocess_args)
return preprocessed_dir