in tools/extract_features.py [0:0]
def extract_features(opts):
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
np.random.seed(cfg.RNG_SEED)
# create the model in test mode only
assert opts.data_type in ['train', 'val', 'test'], "Please specify valid type."
model = model_builder.ModelBuilder(
name='{}_test'.format(cfg.MODEL.MODEL_NAME), train=False,
use_cudnn=True, cudnn_exhaustive_search=True, split=opts.data_type
)
model.build_model()
model.create_net()
model.start_data_loader()
# initialize the model
if cfg.TEST.PARAMS_FILE:
checkpoints.load_model_from_params_file(
model, params_file=cfg.TEST.PARAMS_FILE, checkpoint_dir=None)
else:
logger.info('=====WARN: No params files specified for testing model!')
# initialize the dictionary to store features and targets
img_features, img_targets = {}, {}
for bl in cfg.MODEL.EXTRACT_BLOBS:
img_features[bl], img_targets[bl] = {}, {}
# we keep track of data indices seen so far. This ensures we extract feature
# for each image only once.
indices_list = []
total_test_iters = helpers.get_num_test_iter(model.input_db)
logger.info('Test epoch iters: {}'.format(total_test_iters))
# when we extract features, we run 4 epochs to make sure we capture all the
# data points. This is needed because we use the multi-processing dataloader
# which shuffles the data. In very low-shot setting, making multiple passes
# over the entire data becomes crucial.
extraction_iters = int(total_test_iters * 4)
for test_iter in range(0, extraction_iters):
workspace.RunNet(model.net.Proto().name)
if test_iter == 0:
helpers.print_net(model)
if test_iter % 100 == 0:
logger.info('at: [{}/{}]'.format(test_iter, extraction_iters))
for device in range(cfg.NUM_DEVICES):
indices = workspace.FetchBlob('gpu_{}/db_indices'.format(device))
labels = workspace.FetchBlob('gpu_{}/labels'.format(device))
num_images = indices.shape[0]
indices_list.extend(list(indices))
for bl in cfg.MODEL.EXTRACT_BLOBS:
features = workspace.FetchBlob('gpu_{}/{}'.format(device, bl))
for idx in range(num_images):
index = indices[idx]
if not (index in img_features[bl]):
img_targets[bl][index] = labels[idx].reshape(-1)
img_features[bl][index] = features[idx]
for bl in cfg.MODEL.EXTRACT_BLOBS:
img_features[bl] = dict(sorted(img_features[bl].items()))
img_targets[bl] = dict(sorted(img_targets[bl].items()))
feats = np.array(list(img_features[bl].values()))
N = feats.shape[0]
logger.info('got image features: {} {}'.format(bl, feats.shape))
output = {
'img_features': feats.reshape(N, -1),
'img_inds': np.array(list(img_features[bl].keys())),
'img_targets': np.array(list(img_targets[bl].values())),
}
prefix = '{}_{}_'.format(opts.output_file_prefix, bl)
out_feat_file = os.path.join(opts.output_dir, prefix + 'features.npy')
out_target_file = os.path.join(opts.output_dir, prefix + 'targets.npy')
out_inds_file = os.path.join(opts.output_dir, prefix + 'inds.npy')
logger.info('Saving extracted features: {} {} to: {}'.format(
bl, output['img_features'].shape, out_feat_file))
np.save(out_feat_file, output['img_features'])
logger.info('Saving extracted targets: {} to: {}'.format(
output['img_targets'].shape, out_target_file))
np.save(out_target_file, output['img_targets'])
logger.info('Saving extracted indices: {} to: {}'.format(
output['img_inds'].shape, out_inds_file))
np.save(out_inds_file, output['img_inds'])
logger.info('All Done!')
# shut down the data loader
model.data_loader.shutdown_dataloader()