lmms_eval/tasks/gqa/utils.py (18 lines of code) (raw):
from datasets import load_dataset
GQA_RAW_IMAGE_DATASET = None
GQA_ID2IMAGE = None
def gqa_doc_to_visual(doc):
global GQA_RAW_IMAGE_DATASET
global GQA_ID2IMAGE
if GQA_RAW_IMAGE_DATASET is None:
GQA_RAW_IMAGE_DATASET = load_dataset("lmms-lab/GQA", "testdev_balanced_images", split="testdev", token=True)
GQA_ID2IMAGE = {}
for row in GQA_RAW_IMAGE_DATASET:
GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB")
image = GQA_ID2IMAGE[doc["imageId"]]
return [image]
def gqa_doc_to_text(doc, model_specific_prompt_kwargs):
question = doc["question"]
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
post_prompt = model_specific_prompt_kwargs["post_prompt"]
return f"{pre_prompt}{question}{post_prompt}"