in senteval/snli.py [0:0]
def run(self, params, batcher):
self.X, self.y = {}, {}
dico_label = {'entailment': 0, 'neutral': 1, 'contradiction': 2}
for key in self.data:
if key not in self.X:
self.X[key] = []
if key not in self.y:
self.y[key] = []
input1, input2, mylabels = self.data[key]
enc_input = []
n_labels = len(mylabels)
for ii in range(0, n_labels, params.batch_size):
batch1 = input1[ii:ii + params.batch_size]
batch2 = input2[ii:ii + params.batch_size]
if len(batch1) == len(batch2) and len(batch1) > 0:
enc1 = batcher(params, batch1)
enc2 = batcher(params, batch2)
enc_input.append(np.hstack((enc1, enc2, enc1 * enc2,
np.abs(enc1 - enc2))))
if (ii*params.batch_size) % (20000*params.batch_size) == 0:
logging.info("PROGRESS (encoding): %.2f%%" %
(100 * ii / n_labels))
self.X[key] = np.vstack(enc_input)
self.y[key] = [dico_label[y] for y in mylabels]
config = {'nclasses': 3, 'seed': self.seed,
'usepytorch': params.usepytorch,
'cudaEfficient': True,
'nhid': params.nhid, 'noreg': True}
config_classifier = copy.deepcopy(params.classifier)
config_classifier['max_epoch'] = 15
config_classifier['epoch_size'] = 1
config['classifier'] = config_classifier
clf = SplitClassifier(self.X, self.y, config)
devacc, testacc = clf.run()
logging.debug('Dev acc : {0} Test acc : {1} for SNLI\n'
.format(devacc, testacc))
return {'devacc': devacc, 'acc': testacc,
'ndev': len(self.data['valid'][0]),
'ntest': len(self.data['test'][0])}