lm_eval/tasks/qa4mre/qa4mre_2011.yaml (21 lines of code) (raw):
tag:
- qa4mre
task: qa4mre_2011
dataset_path: qa4mre
dataset_name: 2011.main.EN
output_type: multiple_choice
test_split: train
# doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nChoices:\n- {{answer_choices|join('\n- ')}}\nAnswer:"
doc_to_text: "{{document_str.strip()}}\nQuestion: {{question_str}}\nAnswer:"
doc_to_target: "{{correct_answer_id|int - 1}}"
doc_to_choice: "{{answer_options.answer_str}}"
should_decontaminate: true
doc_to_decontamination_query: "{{document_str.strip()}} + ' ' + {{question_str}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0