lm_eval/tasks/tinyBenchmarks/tinyMMLU.yaml (19 lines of code) (raw):
task: tinyMMLU
dataset_path: tinyBenchmarks/tinyMMLU
dataset_name: all
test_split: test
fewshot_split: dev
num_fewshot: 0
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{input_formatted}}"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
num_fewshot: 0
metric_list:
- metric: acc_norm
aggregation: !function agg_functions.agg_gpirt_mmlu
higher_is_better: true
metadata:
version: 0.0