in senteval/sick.py [0:0]
def run(self, params, batcher):
sick_embed = {'train': {}, 'dev': {}, 'test': {}}
bsize = params.batch_size
for key in self.sick_data:
logging.info('Computing embedding for {0}'.format(key))
# Sort to reduce padding
sorted_corpus = sorted(zip(self.sick_data[key]['X_A'],
self.sick_data[key]['X_B'],
self.sick_data[key]['y']),
key=lambda z: (len(z[0]), len(z[1]), z[2]))
self.sick_data[key]['X_A'] = [x for (x, y, z) in sorted_corpus]
self.sick_data[key]['X_B'] = [y for (x, y, z) in sorted_corpus]
self.sick_data[key]['y'] = [z for (x, y, z) in sorted_corpus]
for txt_type in ['X_A', 'X_B']:
sick_embed[key][txt_type] = []
for ii in range(0, len(self.sick_data[key]['y']), bsize):
batch = self.sick_data[key][txt_type][ii:ii + bsize]
embeddings = batcher(params, batch)
sick_embed[key][txt_type].append(embeddings)
sick_embed[key][txt_type] = np.vstack(sick_embed[key][txt_type])
logging.info('Computed {0} embeddings'.format(key))
# Train
trainA = sick_embed['train']['X_A']
trainB = sick_embed['train']['X_B']
trainF = np.c_[np.abs(trainA - trainB), trainA * trainB]
trainY = np.array(self.sick_data['train']['y'])
# Dev
devA = sick_embed['dev']['X_A']
devB = sick_embed['dev']['X_B']
devF = np.c_[np.abs(devA - devB), devA * devB]
devY = np.array(self.sick_data['dev']['y'])
# Test
testA = sick_embed['test']['X_A']
testB = sick_embed['test']['X_B']
testF = np.c_[np.abs(testA - testB), testA * testB]
testY = np.array(self.sick_data['test']['y'])
config = {'nclasses': 3, 'seed': self.seed,
'usepytorch': params.usepytorch,
'classifier': params.classifier,
'nhid': params.nhid}
clf = SplitClassifier(X={'train': trainF, 'valid': devF, 'test': testF},
y={'train': trainY, 'valid': devY, 'test': testY},
config=config)
devacc, testacc = clf.run()
logging.debug('\nDev acc : {0} Test acc : {1} for \
SICK entailment\n'.format(devacc, testacc))
return {'devacc': devacc, 'acc': testacc,
'ndev': len(devA), 'ntest': len(testA)}