in iep/data.py [0:0]
def __init__(self, question_h5, feature_h5, vocab, mode='prefix',
image_h5=None, max_samples=None, question_families=None,
image_idx_start_from=None):
mode_choices = ['prefix', 'postfix']
if mode not in mode_choices:
raise ValueError('Invalid mode "%s"' % mode)
self.image_h5 = image_h5
self.vocab = vocab
self.feature_h5 = feature_h5
self.mode = mode
self.max_samples = max_samples
mask = None
if question_families is not None:
# Use only the specified families
all_families = np.asarray(question_h5['question_families'])
N = all_families.shape[0]
print(question_families)
target_families = np.asarray(question_families)[:, None]
mask = (all_families == target_families).any(axis=0)
if image_idx_start_from is not None:
all_image_idxs = np.asarray(question_h5['image_idxs'])
mask = all_image_idxs >= image_idx_start_from
# Data from the question file is small, so read it all into memory
print('Reading question data into memory')
self.all_questions = _dataset_to_tensor(question_h5['questions'], mask)
self.all_image_idxs = _dataset_to_tensor(question_h5['image_idxs'], mask)
self.all_programs = None
if 'programs' in question_h5:
self.all_programs = _dataset_to_tensor(question_h5['programs'], mask)
self.all_answers = _dataset_to_tensor(question_h5['answers'], mask)