lm_eval/tasks/hellaswag/hellaswag.yaml (24 lines of code) (raw):
tag:
- multiple_choice
task: hellaswag
dataset_path: hellaswag
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: null
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{label}}"
doc_to_choice: "choices"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true