in access/fairseq/main.py [0:0]
def fairseq_train_and_evaluate(dataset, metrics_coefs=[1, 1, 1], parametrization_budget=64, **kwargs):
check_dataset(dataset)
kwargs = check_and_resolve_args(kwargs)
exp_dir = prepare_exp_dir()
preprocessors_kwargs = kwargs.get('preprocessors_kwargs', {})
preprocessors = get_preprocessors(preprocessors_kwargs)
if len(preprocessors) > 0:
dataset = create_preprocessed_dataset(dataset, preprocessors, n_jobs=1)
shutil.copy(get_dataset_dir(dataset) / 'preprocessors.pickle', exp_dir)
preprocessed_dir = fairseq_preprocess(dataset)
train_kwargs = get_allowed_kwargs(fairseq_train, preprocessed_dir, exp_dir, **kwargs)
fairseq_train(preprocessed_dir, exp_dir=exp_dir, **train_kwargs)
# Evaluation
generate_kwargs = get_allowed_kwargs(fairseq_generate, 'complex_filepath', 'pred_filepath', exp_dir, **kwargs)
recommended_preprocessors_kwargs = find_best_parametrization(exp_dir, metrics_coefs, preprocessors_kwargs,
parametrization_budget)
print(f'recommended_preprocessors_kwargs={recommended_preprocessors_kwargs}')
simplifier = get_simplifier(exp_dir, recommended_preprocessors_kwargs, generate_kwargs)
scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid')
print(f'scores={scores}')
score = combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'], metrics_coefs)
return score