def make_data_loader()

in evaluation/tiny_benchmark/maskrcnn_benchmark/data/build.py [0:0]
74 lines of code
14 McCabe index (conditional complexity)

def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0,
                     shuffle=None):  # add by hui
    num_gpus = get_world_size()
    if is_train:
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        if shuffle is None: shuffle = True
        num_iters = cfg.SOLVER.MAX_ITER

        # ############################## add by hui ########################################
        balance_normal = cfg.DATALOADER.USE_TRAIN_BALANCE_NORMAL
        normal_ratio = cfg.DATALOADER.TRAIN_NORMAL_RATIO
        remove_images_without_annotations = not balance_normal
        filter_ignore = cfg.DATASETS.COCO_DATASET.TRAIN_FILTER_IGNORE
        ################################################################################
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number "
        "of GPUs ({}) used.".format(images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        if shuffle is None: shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0
        # ############################## add by hui ########################################
        balance_normal = cfg.DATALOADER.USE_TEST_BALANCE_NORMAL
        normal_ratio = cfg.DATALOADER.TEST_NORMAL_RATIO
        if balance_normal: shuffle = True
        remove_images_without_annotations = False
        filter_ignore = cfg.DATASETS.COCO_DATASET.TEST_FILTER_IGNORE
        ################################################################################
    if cfg.DATALOADER.DEBUG.CLOSE_SHUFFLE:  # add by hui
        shuffle = False

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []

    paths_catalog = import_file(
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    transforms = build_transforms(cfg, is_train)
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train,
                             remove_images_without_annotations, filter_ignore)   # add by hui

    data_loaders = []
    for dataset in datasets:
        sampler = make_data_sampler(dataset, shuffle, is_distributed, balance_normal, normal_ratio)  # changed by hui
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)
        num_workers = cfg.DATALOADER.NUM_WORKERS
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=batch_sampler,
            collate_fn=collator,
            timeout=30,                                # add by hui for big batch
        )
        data_loaders.append(data_loader)
    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders