def main()

in data/ok-vqa/ok-vqa.py [0:0]


def main(download_coco=True):
    os.makedirs(DATA_DIR, exist_ok=True)
    for split in SPLITS:
        print(f"Processing split {split}")

        # Download images
        if download_coco:
            print("Downloading images file...")
            download_and_unzip(f"{BASE_COCO_URL}{split}{YEAR}.zip")
            os.makedirs(f"{DATA_DIR}/{split}", exist_ok=True)

        # Download questions
        print("Downloading questions file...")
        questions_file = f"OpenEnded_mscoco_{split}{YEAR}_questions.json"
        download_and_unzip(BASE_OK_VQA_URL + questions_file + ".zip")

        # Download annotations
        print("Downloading annotations file...")
        annotations_file = f"mscoco_{split}{YEAR}_annotations.json"
        download_and_unzip(BASE_OK_VQA_URL + annotations_file + ".zip")

        if not os.path.exists(f"{DATA_DIR}/{split}/metadata.csv"):
            with open(f"{DATA_DIR}/{split}/metadata.csv", "w") as f:
                f.write("file_name,text,idx\n")

        with open(questions_file, "r") as json_questions_file:
            with open(annotations_file, "r") as json_annotations_file:
                questions = json.load(json_questions_file)["questions"]
                annotations = json.load(json_annotations_file)["annotations"]
                tar = None
                for idx, question in enumerate(questions):
                    if idx % FILES_PER_TAR == 0:
                        if tar is not None:
                            tar.close()
                        tar = TarFile(f"{DATA_DIR}/{split}/images_{idx // FILES_PER_TAR}.tar.gz", mode="w")

                    annotation = annotations[idx]
                    assert question["question_id"] == annotation["question_id"], print(
                        f"Question id doesn't match at index {idx}"
                    )
                    assert question["image_id"] == annotation["image_id"], print(
                        f"Image id {idx} doesn't match at index {idx}"
                    )

                    text = f"Q: {question['question']} A: {annotation['answers'][0]['answer']}"
                    image_name = f"COCO_{split}{YEAR}_{question['image_id']:012d}.jpg"
                    image = PIL.Image.open(f"{split}{YEAR}/{image_name}")
                    image = resize_single_image(image)
                    output = io.BytesIO()
                    image.save(output, format="JPEG")
                    output.seek(0)
                    info = tarfile.TarInfo(image_name)
                    info.size = len(output.getvalue())
                    tar.addfile(info, fileobj=output)
                    output.close()
                    with open(f"{DATA_DIR}/{split}/metadata.csv", "a") as f:
                        f.write(f"{image_name},{text},{idx}\n")