in loader_vd/data_reader.py [0:0]
def __init__(self, imdb, params):
"""Initialize by reading the data and pre-processing it.
"""
self.imdb = imdb
self.params = params
self.fetch_options = self.params.get('fetch_options', False)
self.preload_features = params['preload_features']
self.num_inst = len(self.imdb['data'])
self.num_rounds = len(self.imdb['data'][0]['question_ind'])
# check if vgg features are to be used
self.use_vgg = 'vgg' in self.params['feature_path']
# load vocabulary
vocab_path = params['text_vocab_path']
self.vocab_dict = text_processing.VocabDict(vocab_path)
self.T_encoder = params['max_enc_len']
# record special token ids
self.start_token_id = self.vocab_dict.word2idx('<start>')
self.end_token_id = self.vocab_dict.word2idx('<end>')
self.pad_token_id = self.vocab_dict.word2idx('<pad>')
# peek one example to see whether answer and gt_layout are in the data
test_data = self.imdb['data'][0]
self.load_gt_layout = test_data.get('gt_layout_tokens', False)
if 'load_gt_layout' in params:
self.load_gt_layout = params['load_gt_layout']
# decide whether or not to load gt textatt
self.supervise_attention = params['supervise_attention']
self.T_decoder = params['max_dec_len']
self.assembler = params['assembler']
# load one feature map to peek its size
feats = np.load(self._adjust_image_dir(test_data['feature_path']))
self.feat_H, self.feat_W, self.feat_D = feats.shape[1:]
# convert to tokens
self.digitizer = lambda x: [self.vocab_dict.word2idx(w) for w in x]
if 'prog' in self.params['model']:
# preload features
if self.preload_features:
img_paths = set([ii['feature_path'] for ii in self.imdb['data']])
self.img_feats = {ii:np.load(ii) for ii in progressbar(img_paths)}
# if VGG is to be used
if self.use_vgg:
# inform the dataloader to use self.img_feats
self.preload_features = True
img_paths = set([ii['feature_path'] for ii in self.imdb['data']])
# first read the index file
index_file = os.path.join(self.params['input_img'], 'img_id.json')
with open(index_file, 'r') as file_id:
index_data = json.load(file_id)
# get the split -- either train / val
for ii in img_paths: break
split = ii.split('/')[-2][:-4]
# read the features for that particular split
self.img_index = {img_id: index for index, img_id
in enumerate(index_data[split])}
feature_file = os.path.join(self.params['input_img'],
'data_img_%s.h5' % split)
key = 'images_test' if split == 'val' else 'images_train'
self.img_feats = h5py.File(feature_file)[key]
# check if all the images in img_paths are in img_index
count = 0
for ii in img_paths:
img_id = '/'.join(ii.split('/')[-2:])
if img_id.replace('npy', 'jpg') not in self.img_index:
count += 1
print('Missing: %d image features' % count)
# adjust the feature sizes
self.feat_H, self.feat_W, self.feat_D = self.img_feats.shape[1:]
self.zero_feature = np.zeros((1,) + self.img_feats.shape[1:])
# use history if needed by the program generator
self.use_history = self.params['generator'] == 'mem'
if self.use_history:
self._construct_history()
# if fact is to be used
if self.params['use_fact']:
self._construct_fact()