lm_eval/tasks/qa4mre/qa4mre_2011.yaml (21 lines of code) (raw):

tag: - qa4mre task: qa4mre_2011 dataset_path: qa4mre dataset_name: 2011.main.EN output_type: multiple_choice test_split: train # doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:" doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nAnswer:" doc_to_target: "{{correct_answer_id|int - 1}}" doc_to_choice: "{{answer_options.answer_str}}" should_decontaminate: true doc_to_decontamination_query: "{{document_str.strip()}} + ' ' + {{question_str}}" metric_list: - metric: acc aggregation: mean higher_is_better: true - metric: acc_norm aggregation: mean higher_is_better: true metadata: version: 1.0