lmms_eval/tasks/gqa/utils.py (18 lines of code) (raw):

from datasets import load_dataset GQA_RAW_IMAGE_DATASET = None GQA_ID2IMAGE = None def gqa_doc_to_visual(doc): global GQA_RAW_IMAGE_DATASET global GQA_ID2IMAGE if GQA_RAW_IMAGE_DATASET is None: GQA_RAW_IMAGE_DATASET = load_dataset("lmms-lab/GQA", "testdev_balanced_images", split="testdev", token=True) GQA_ID2IMAGE = {} for row in GQA_RAW_IMAGE_DATASET: GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB") image = GQA_ID2IMAGE[doc["imageId"]] return [image] def gqa_doc_to_text(doc, model_specific_prompt_kwargs): question = doc["question"] pre_prompt = model_specific_prompt_kwargs["pre_prompt"] post_prompt = model_specific_prompt_kwargs["post_prompt"] return f"{pre_prompt}{question}{post_prompt}"