in lmms_eval/models/gpt4v.py [0:0]
def generate_until(self, requests) -> List[str]:
res = []
pbar = tqdm(total=len(requests), disable=(self.rank != 0), desc="Model Responding")
for contexts, gen_kwargs, doc_to_visual, doc_id, task, split in [reg.args for reg in requests]:
# encode, pad, and truncate contexts for this batch
visuals = [doc_to_visual(self.task_dict[task][split][doc_id])]
visuals = self.flatten(visuals)
imgs = []
for visual in visuals:
img = self.encode_image(visual)
imgs.append(img)
payload = {"model": "gpt-4-vision-preview", "messages": []}
response_json = {"role": "user", "content": []}
# When there is no image token in the context, append the image to the text
if self.image_token not in contexts:
payload["messages"].append(deepcopy(response_json))
payload["messages"][0]["content"].append({"type": "text", "text": contexts})
for img in imgs:
payload["messages"][0]["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img}"}})
else:
contexts = contexts.split(self.image_token)
for idx, img in enumerate(imgs):
payload["messages"].append(deepcopy(response_json))
payload["messages"][idx]["content"].append({"type": "text", "text": contexts[idx]})
payload["messages"][idx]["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img}"}})
# If n image tokens are in the contexts
# contexts will be splitted into n+1 chunks
# Manually add it into the payload
payload["messages"].append(deepcopy(response_json))
payload["messages"][-1]["content"].append({"type": "text", "text": contexts[-1]})
if "max_new_tokens" not in gen_kwargs:
gen_kwargs["max_new_tokens"] = 1024
if "temperature" not in gen_kwargs:
gen_kwargs["temperature"] = 0
if "top_p" not in gen_kwargs:
gen_kwargs["top_p"] = None
if "num_beams" not in gen_kwargs:
gen_kwargs["num_beams"] = 1
# payload["max_tokens"] = gen_kwargs["max_new_tokens"]
# payload["temperature"] = gen_kwargs["temperature"]
for attempt in range(5):
try:
response = url_requests.post(API_URL, headers=headers, json=payload, timeout=20)
response_data = response.json()
content = response_data["choices"][0]["message"]["content"].strip()
break # If successful, break out of the loop
except Exception as e:
eval_logger.info(f"Attempt {attempt + 1} failed with error: {str(e)}")
if attempt < 5 - 1: # If we have retries left, sleep and then continue to next attempt
time.sleep(NUM_SECONDS_TO_SLEEP)
else: # If this was the last attempt, log and return empty
eval_logger.error(f"All 5 attempts failed. Last error message: {str(e)}")
content = ""
res.append(content)
pbar.update(1)
return res