def get_all_baseline_rows()

in muss/mining/training.py [0:0]


def get_all_baseline_rows():
    paths = {
        ('asset', 'test'): ('en', TEST_SETS_PATHS[('asset_test', 'orig')], TEST_SETS_PATHS[('asset_test', 'refs')]),
        ('asset', 'valid'): ('en', TEST_SETS_PATHS[('asset_valid', 'orig')], TEST_SETS_PATHS[('asset_valid', 'refs')]),
        ('turkcorpus_detokenized', 'test'): (
            'en',
            TEST_SETS_PATHS[('turkcorpus_test', 'orig')],
            TEST_SETS_PATHS[('turkcorpus_test', 'refs')],
        ),
        ('turkcorpus_detokenized', 'valid'): (
            'en',
            TEST_SETS_PATHS[('turkcorpus_valid', 'orig')],
            TEST_SETS_PATHS[('turkcorpus_valid', 'refs')],
        ),
        ('alector', 'test'): (
            'fr',
            get_data_filepath('alector', 'test', 'complex'),
            [get_data_filepath('alector', 'test', 'simple')],
        ),
        ('alector', 'valid'): (
            'fr',
            get_data_filepath('alector', 'valid', 'complex'),
            [get_data_filepath('alector', 'valid', 'simple')],
        ),
        # Old dataset with problems
        ('simplext_corpus_all', 'test'): (
            'es',
            get_data_filepath('simplext_corpus_all', 'test', 'complex'),
            [get_data_filepath('simplext_corpus_all', 'test', 'simple')],
        ),
        ('simplext_corpus_all', 'valid'): (
            'es',
            get_data_filepath('simplext_corpus_all', 'valid', 'complex'),
            [get_data_filepath('simplext_corpus_all', 'valid', 'simple')],
        ),
        ('simplext_corpus_all_fixed', 'test'): (
            'es',
            get_data_filepath('simplext_corpus_all_fixed', 'test', 'complex'),
            [get_data_filepath('simplext_corpus_all_fixed', 'test', 'simple')],
        ),
        ('simplext_corpus_all_fixed', 'valid'): (
            'es',
            get_data_filepath('simplext_corpus_all_fixed', 'valid', 'complex'),
            [get_data_filepath('simplext_corpus_all_fixed', 'valid', 'simple')],
        ),
        ('simpitiki', 'test'): (
            'it',
            get_data_filepath('simpitiki', 'test', 'complex'),
            [get_data_filepath('simpitiki', 'test', 'simple')],
        ),
        ('simpitiki', 'valid'): (
            'it',
            get_data_filepath('simpitiki', 'valid', 'complex'),
            [get_data_filepath('simpitiki', 'valid', 'simple')],
        ),
    }
    rows = []
    for (dataset, phase), (language, orig_sents_path, refs_sents_paths) in tqdm(paths.items()):
        dataset_rows = get_baseline_rows(orig_sents_path, tuple(refs_sents_paths), language)
        for row in dataset_rows:
            row['dataset'] = dataset
            row['phase'] = phase
        rows.extend(dataset_rows)
    return rows