in data/conceptual_captions/generate_conceptual_caption.py [0:0]
def process(example: Dict[str, List[str]]) -> Dict[str, List[Union[str, PIL.Image.Image]]]:
output = {"images": [], "text": []}
with multiprocessing.Pool() as pool:
images = pool.starmap(fetch_and_resize, [(url,) for url in example["image_url"]])
for idx, image in enumerate(images):
if image is not None:
output["images"].append(image)
output["text"].append(example["caption"][idx])
return output