mlebench/competitions/herbarium-2021-fgvc8/prepare.py [152:170]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    new_test_metadata.update(
        {
            "annotations": [],
            "images": [],
        }
    )
    # Flatten and shuffle test set so that we don't have all the same categories in a row
    test_annotations_images = [
        item for sublist in test_annotations_images_by_category.values() for item in sublist
    ]
    random.Random(0).shuffle(test_annotations_images)
    for idx, annotation_image in tqdm(
        enumerate(test_annotations_images),
        desc="Creating new test dataset",
        total=len(test_annotations_images),
    ):

        # Make new image id, for test set this is just the index
        new_image_id = str(idx)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


mlebench/competitions/herbarium-2022-fgvc9/prepare.py [139:159]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    new_test_metadata.update(
        {
            "annotations": [],
            "images": [],
        }
    )
    # Flatten and shuffle test set so that we don't have all the same categories in a row
    test_annotations_images = [
        item for sublist in test_annotations_images_by_category.values() for item in sublist
    ]
    random.Random(0).shuffle(test_annotations_images)
    for idx, annotation_image in tqdm(
        enumerate(test_annotations_images),
        desc="Creating new test dataset",
        total=len(test_annotations_images),
    ):
        # Update the image_id and file_name so that we don't have gaps in the image_id
        # (after doing train/test split, image ids are not contiguous within train and test)

        # Make new image id, for test set this is just the index
        new_image_id = str(idx)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -