in muss/mining/training.py [0:0]
def get_baseline_rows(orig_sents_path, refs_sents_paths, language):
refs_sents_paths = list(refs_sents_paths)
rows = []
scores = evaluate_system_output(
'custom',
sys_sents_path=orig_sents_path,
orig_sents_path=orig_sents_path,
refs_sents_paths=refs_sents_paths,
metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
quality_estimation=False,
)
row = {
'exp_name': 'Identity',
'language': language,
}
rows.append(add_dicts(row, scores))
scores = evaluate_system_output(
'custom',
sys_sents_path=apply_line_function_to_file(
lambda sentence: truncate(sentence, truncate_prop=0.2, language=language), orig_sents_path
),
orig_sents_path=orig_sents_path,
refs_sents_paths=refs_sents_paths,
metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
quality_estimation=False,
)
row = {
'exp_name': 'Truncate',
'language': language,
}
rows.append(add_dicts(row, scores))
if len(refs_sents_paths) > 1:
for i in range(len(refs_sents_paths)):
scores = evaluate_system_output(
'custom',
sys_sents_path=refs_sents_paths[i],
orig_sents_path=orig_sents_path,
refs_sents_paths=[refs_sents_paths[i - 1]] + refs_sents_paths[:i] + refs_sents_paths[i + 1 :],
metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
quality_estimation=False,
)
row = {
'exp_name': 'Reference',
'language': language,
'job_id': f'ref_{i}',
}
rows.append(add_dicts(row, scores))
return rows