lm_eval/tasks/cmmlu/_cmmlu.yaml (78 lines of code) (raw):
group: cmmlu
task:
- cmmlu_agronomy
- cmmlu_anatomy
- cmmlu_ancient_chinese
- cmmlu_arts
- cmmlu_astronomy
- cmmlu_business_ethics
- cmmlu_chinese_civil_service_exam
- cmmlu_chinese_driving_rule
- cmmlu_chinese_food_culture
- cmmlu_chinese_foreign_policy
- cmmlu_chinese_history
- cmmlu_chinese_literature
- cmmlu_chinese_teacher_qualification
- cmmlu_clinical_knowledge
- cmmlu_college_actuarial_science
- cmmlu_college_education
- cmmlu_college_engineering_hydrology
- cmmlu_college_law
- cmmlu_college_mathematics
- cmmlu_college_medical_statistics
- cmmlu_college_medicine
- cmmlu_computer_science
- cmmlu_computer_security
- cmmlu_conceptual_physics
- cmmlu_construction_project_management
- cmmlu_economics
- cmmlu_education
- cmmlu_electrical_engineering
- cmmlu_elementary_chinese
- cmmlu_elementary_commonsense
- cmmlu_elementary_information_and_technology
- cmmlu_elementary_mathematics
- cmmlu_ethnology
- cmmlu_food_science
- cmmlu_genetics
- cmmlu_global_facts
- cmmlu_high_school_biology
- cmmlu_high_school_chemistry
- cmmlu_high_school_geography
- cmmlu_high_school_mathematics
- cmmlu_high_school_physics
- cmmlu_high_school_politics
- cmmlu_human_sexuality
- cmmlu_international_law
- cmmlu_journalism
- cmmlu_jurisprudence
- cmmlu_legal_and_moral_basis
- cmmlu_logical
- cmmlu_machine_learning
- cmmlu_management
- cmmlu_marketing
- cmmlu_marxist_theory
- cmmlu_modern_chinese
- cmmlu_nutrition
- cmmlu_philosophy
- cmmlu_professional_accounting
- cmmlu_professional_law
- cmmlu_professional_medicine
- cmmlu_professional_psychology
- cmmlu_public_relations
- cmmlu_security_study
- cmmlu_sociology
- cmmlu_sports_science
- cmmlu_traditional_chinese_medicine
- cmmlu_virology
- cmmlu_world_history
- cmmlu_world_religions
aggregate_metric_list:
- aggregation: mean
metric: acc
weight_by_size: true
- aggregation: mean
metric: acc_norm
weight_by_size: true
metadata:
version: 0.0