lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml (21 lines of code) (raw):
task: piqa_ar
dataset_path: Hennara/pica_ar
dataset_name: null
output_type: multiple_choice
training_split: null
validation_split: null
test_split: test
doc_to_text: "السؤال: {{goal}}\nالجواب:"
doc_to_choice: "{{[sol1, sol2]}}"
doc_to_target: label
should_decontaminate: true
doc_to_decontamination_query: goal
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0