in ubteacher/data/build.py [0:0]
def build_detection_semisup_train_loader(cfg, mapper=None):
dataset_dicts = get_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
if cfg.MODEL.KEYPOINT_ON
else 0,
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
if cfg.MODEL.LOAD_PROPOSALS
else None,
)
# Divide into labeled and unlabeled sets according to supervision percentage
label_dicts, unlabel_dicts = divide_label_unlabel(
dataset_dicts,
cfg.DATALOADER.SUP_PERCENT,
cfg.DATALOADER.RANDOM_DATA_SEED,
cfg.DATALOADER.RANDOM_DATA_SEED_PATH,
)
dataset = DatasetFromList(label_dicts, copy=False)
if mapper is None:
mapper = DatasetMapper(cfg, True)
dataset = MapDataset(dataset, mapper)
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
repeat_factors = (
RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
label_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
)
)
sampler = RepeatFactorTrainingSampler(repeat_factors)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
# list num of labeled and unlabeled
logger.info("Number of training samples " + str(len(dataset)))
logger.info("Supervision percentage " + str(cfg.DATALOADER.SUP_PERCENT))
return build_batch_data_loader(
dataset,
sampler,
cfg.SOLVER.IMS_PER_BATCH,
aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
num_workers=cfg.DATALOADER.NUM_WORKERS,
)