lm_eval/tasks/hellaswag/hellaswag.yaml (24 lines of code) (raw):

tag: - multiple_choice task: hellaswag dataset_path: hellaswag dataset_name: null output_type: multiple_choice training_split: train validation_split: validation test_split: null process_docs: !function utils.process_docs doc_to_text: "{{query}}" doc_to_target: "{{label}}" doc_to_choice: "choices" metric_list: - metric: acc aggregation: mean higher_is_better: true - metric: acc_norm aggregation: mean higher_is_better: true metadata: version: 1.0 dataset_kwargs: trust_remote_code: true