lmms_eval/tasks/scienceqa/utils.py (35 lines of code) (raw):
def sqa_doc_to_text(doc, model_specific_prompt_kwargs=None):
context, question, choices = doc["hint"], doc["question"], doc["choices"]
len_choices = len(choices)
options = [chr(ord("A") + i) for i in range(len_choices)]
choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)])
if model_specific_prompt_kwargs["format"] == "default":
if context:
context = f"Context: {context}\n"
post_prompt = model_specific_prompt_kwargs["post_prompt"]
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
return f"{pre_prompt}{context}{question}\n{choices_str}{post_prompt}"
elif model_specific_prompt_kwargs["format"] == "qwen_vl":
prompt = "Context: {}\nQuestion: {}\nOptions: {}\nAnswer:"
context = context if context else "N/A"
prompt = prompt.format(context, question, choices_str)
return prompt
else:
raise ValueError(f"Unknown prompt format: {model_specific_prompt_kwargs}")
def sqa_doc_to_visual(doc):
if doc["image"] is None:
return []
return [doc["image"].convert("RGB")]
def sqa_doc_to_target(doc):
len_choices = len(doc["choices"])
options = [chr(ord("A") + i) for i in range(len_choices)]
return options[doc["answer"]]
def sqa_process_results(doc, results):
# I know this is weird, but it's how llava parse it.
target = sqa_doc_to_target(doc)
pred = results[0]
if pred == target:
return {"exact_match": 1.0}
# pattern: ^[A-Z]\. .*
if len(pred) >= 2 and pred[0].isupper() and pred[1] == ".":
result = 1.0 if pred[0] == target else 0.0
return {"exact_match": result}
return {"exact_match": 0.0}