lmms_eval/tasks/mmbench/mmbench_cc.yaml (33 lines of code) (raw):

dataset_path: lmms-lab/MMBench dataset_name: cc dataset_kwargs: token: True task: "mmbench_cn_cc" test_split: test output_type: generate_until doc_to_visual: !function cc_utils.mmbench_doc_to_visual doc_to_text: !function cc_utils.mmbench_cn_cc_doc_to_text doc_to_target: "answer" generation_kwargs: max_new_tokens: 256 temperature: 0 top_p: 0 num_beams: 1 do_sample: false process_results: !function cc_utils.mmbench_cn_cc_process_results metric_list: - metric: gpt_eval_score aggregation: !function cc_utils.mmbench_cn_cc_aggregate_dev_results_eval higher_is_better: true - metric: submission aggregation: !function cc_utils.mmbench_cn_cc_aggregate_results metadata: version: 0.0 gpt_eval_model_name: "gpt-3.5-turbo-0613" model_specific_prompt_kwargs: default: pre_prompt: "" post_prompt: "\n请直接使用所提供的选项字母作为答案回答。" model_specific_generation_kwargs: llava: image_aspect_ratio: original