lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml (21 lines of code) (raw):

task: piqa_ar dataset_path: Hennara/pica_ar dataset_name: null output_type: multiple_choice training_split: null validation_split: null test_split: test doc_to_text: "السؤال: {{goal}}\nالجواب:" doc_to_choice: "{{[sol1, sol2]}}" doc_to_target: label should_decontaminate: true doc_to_decontamination_query: goal metric_list: - metric: acc aggregation: mean higher_is_better: true - metric: acc_norm aggregation: mean higher_is_better: true metadata: version: 1.0