def extract_features()

in tools/extract_features.py [0:0]


def extract_features(opts):
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    np.random.seed(cfg.RNG_SEED)

    # create the model in test mode only
    assert opts.data_type in ['train', 'val', 'test'], "Please specify valid type."
    model = model_builder.ModelBuilder(
        name='{}_test'.format(cfg.MODEL.MODEL_NAME), train=False,
        use_cudnn=True, cudnn_exhaustive_search=True, split=opts.data_type
    )
    model.build_model()
    model.create_net()
    model.start_data_loader()

    # initialize the model
    if cfg.TEST.PARAMS_FILE:
        checkpoints.load_model_from_params_file(
            model, params_file=cfg.TEST.PARAMS_FILE, checkpoint_dir=None)
    else:
        logger.info('=====WARN: No params files specified for testing model!')

    # initialize the dictionary to store features and targets
    img_features, img_targets = {}, {}
    for bl in cfg.MODEL.EXTRACT_BLOBS:
        img_features[bl], img_targets[bl] = {}, {}

    # we keep track of data indices seen so far. This ensures we extract feature
    # for each image only once.
    indices_list = []
    total_test_iters = helpers.get_num_test_iter(model.input_db)
    logger.info('Test epoch iters: {}'.format(total_test_iters))
    # when we extract features, we run 4 epochs to make sure we capture all the
    # data points. This is needed because we use the multi-processing dataloader
    # which shuffles the data. In very low-shot setting, making multiple passes
    # over the entire data becomes crucial.
    extraction_iters = int(total_test_iters * 4)
    for test_iter in range(0, extraction_iters):
        workspace.RunNet(model.net.Proto().name)
        if test_iter == 0:
            helpers.print_net(model)
        if test_iter % 100 == 0:
            logger.info('at: [{}/{}]'.format(test_iter, extraction_iters))
        for device in range(cfg.NUM_DEVICES):
            indices = workspace.FetchBlob('gpu_{}/db_indices'.format(device))
            labels = workspace.FetchBlob('gpu_{}/labels'.format(device))
            num_images = indices.shape[0]
            indices_list.extend(list(indices))
            for bl in cfg.MODEL.EXTRACT_BLOBS:
                features = workspace.FetchBlob('gpu_{}/{}'.format(device, bl))
                for idx in range(num_images):
                    index = indices[idx]
                    if not (index in img_features[bl]):
                        img_targets[bl][index] = labels[idx].reshape(-1)
                        img_features[bl][index] = features[idx]

    for bl in cfg.MODEL.EXTRACT_BLOBS:
        img_features[bl] = dict(sorted(img_features[bl].items()))
        img_targets[bl] = dict(sorted(img_targets[bl].items()))
        feats = np.array(list(img_features[bl].values()))
        N = feats.shape[0]
        logger.info('got image features: {} {}'.format(bl, feats.shape))
        output = {
            'img_features': feats.reshape(N, -1),
            'img_inds': np.array(list(img_features[bl].keys())),
            'img_targets': np.array(list(img_targets[bl].values())),
        }
        prefix = '{}_{}_'.format(opts.output_file_prefix, bl)
        out_feat_file = os.path.join(opts.output_dir, prefix + 'features.npy')
        out_target_file = os.path.join(opts.output_dir, prefix + 'targets.npy')
        out_inds_file = os.path.join(opts.output_dir, prefix + 'inds.npy')
        logger.info('Saving extracted features: {} {} to: {}'.format(
            bl, output['img_features'].shape, out_feat_file))
        np.save(out_feat_file, output['img_features'])
        logger.info('Saving extracted targets: {} to: {}'.format(
            output['img_targets'].shape, out_target_file))
        np.save(out_target_file, output['img_targets'])
        logger.info('Saving extracted indices: {} to: {}'.format(
            output['img_inds'].shape, out_inds_file))
        np.save(out_inds_file, output['img_inds'])

    logger.info('All Done!')
    # shut down the data loader
    model.data_loader.shutdown_dataloader()