in senteval/rank.py [0:0]
def run(self, params, batcher):
coco_embed = {'train': {'sentfeat': [], 'imgfeat': []},
'dev': {'sentfeat': [], 'imgfeat': []},
'test': {'sentfeat': [], 'imgfeat': []}}
for key in self.coco_data:
logging.info('Computing embedding for {0}'.format(key))
# Sort to reduce padding
self.coco_data[key]['sent'] = np.array(self.coco_data[key]['sent'])
self.coco_data[key]['sent'], idx_sort = np.sort(self.coco_data[key]['sent']), np.argsort(self.coco_data[key]['sent'])
idx_unsort = np.argsort(idx_sort)
coco_embed[key]['X'] = []
nsent = len(self.coco_data[key]['sent'])
for ii in range(0, nsent, params.batch_size):
batch = self.coco_data[key]['sent'][ii:ii + params.batch_size]
embeddings = batcher(params, batch)
coco_embed[key]['sentfeat'].append(embeddings)
coco_embed[key]['sentfeat'] = np.vstack(coco_embed[key]['sentfeat'])[idx_unsort]
coco_embed[key]['imgfeat'] = np.array(self.coco_data[key]['imgfeat'])
logging.info('Computed {0} embeddings'.format(key))
config = {'seed': self.seed, 'projdim': 1000, 'margin': 0.2}
clf = ImageSentenceRankingPytorch(train=coco_embed['train'],
valid=coco_embed['dev'],
test=coco_embed['test'],
config=config)
bestdevscore, r1_i2t, r5_i2t, r10_i2t, medr_i2t, \
r1_t2i, r5_t2i, r10_t2i, medr_t2i = clf.run()
logging.debug("\nTest scores | Image to text: \
{0}, {1}, {2}, {3}".format(r1_i2t, r5_i2t, r10_i2t, medr_i2t))
logging.debug("Test scores | Text to image: \
{0}, {1}, {2}, {3}\n".format(r1_t2i, r5_t2i, r10_t2i, medr_t2i))
return {'devacc': bestdevscore,
'acc': [(r1_i2t, r5_i2t, r10_i2t, medr_i2t),
(r1_t2i, r5_t2i, r10_t2i, medr_t2i)],
'ndev': len(coco_embed['dev']['sentfeat']),
'ntest': len(coco_embed['test']['sentfeat'])}