def get_baseline

def get_baseline_rows()

in muss/mining/training.py [0:0]
48 lines of code
3 McCabe index (conditional complexity)

def get_baseline_rows(orig_sents_path, refs_sents_paths, language):
    refs_sents_paths = list(refs_sents_paths)
    rows = []
    scores = evaluate_system_output(
        'custom',
        sys_sents_path=orig_sents_path,
        orig_sents_path=orig_sents_path,
        refs_sents_paths=refs_sents_paths,
        metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
        quality_estimation=False,
    )
    row = {
        'exp_name': 'Identity',
        'language': language,
    }
    rows.append(add_dicts(row, scores))

    scores = evaluate_system_output(
        'custom',
        sys_sents_path=apply_line_function_to_file(
            lambda sentence: truncate(sentence, truncate_prop=0.2, language=language), orig_sents_path
        ),
        orig_sents_path=orig_sents_path,
        refs_sents_paths=refs_sents_paths,
        metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
        quality_estimation=False,
    )
    row = {
        'exp_name': 'Truncate',
        'language': language,
    }
    rows.append(add_dicts(row, scores))

    if len(refs_sents_paths) > 1:
        for i in range(len(refs_sents_paths)):
            scores = evaluate_system_output(
                'custom',
                sys_sents_path=refs_sents_paths[i],
                orig_sents_path=orig_sents_path,
                refs_sents_paths=[refs_sents_paths[i - 1]] + refs_sents_paths[:i] + refs_sents_paths[i + 1 :],
                metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'],
                quality_estimation=False,
            )
            row = {
                'exp_name': 'Reference',
                'language': language,
                'job_id': f'ref_{i}',
            }
            rows.append(add_dicts(row, scores))
    return rows