lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml (44 lines of code) (raw):

group: mmlusr_answer_only group_alias: MMLU-SR (Answer Only) task: - group: mmlusr_ao_stem group_alias: STEM (Answer Only) task: - mmlusr_answer_only_stem_tasks aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 1 - group: mmlusr_ao_other group_alias: Other (Answer Only) task: - mmlusr_answer_only_other_tasks aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 1 - group: mmlusr_ao_social_sciences group_alias: Social Sciences (Answer Only) task: - mmlusr_answer_only_social_sciences_tasks aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 1 - group: mmlusr_ao_humanities group_alias: Humanities (Answer Only) task: - mmlusr_answer_only_humanities_tasks aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 1 aggregate_metric_list: - metric: acc weight_by_size: True metadata: version: 1