def process()

in data/conceptual_captions/generate_conceptual_caption.py [0:0]


def process(example: Dict[str, List[str]]) -> Dict[str, List[Union[str, PIL.Image.Image]]]:
    output = {"images": [], "text": []}

    with multiprocessing.Pool() as pool:
        images = pool.starmap(fetch_and_resize, [(url,) for url in example["image_url"]])

    for idx, image in enumerate(images):
        if image is not None:
            output["images"].append(image)
            output["text"].append(example["caption"][idx])

    return output