in senteval/trec.py [0:0]
def run(self, params, batcher):
train_embeddings, test_embeddings = [], []
# Sort to reduce padding
sorted_corpus_train = sorted(zip(self.train['X'], self.train['y']),
key=lambda z: (len(z[0]), z[1]))
train_samples = [x for (x, y) in sorted_corpus_train]
train_labels = [y for (x, y) in sorted_corpus_train]
sorted_corpus_test = sorted(zip(self.test['X'], self.test['y']),
key=lambda z: (len(z[0]), z[1]))
test_samples = [x for (x, y) in sorted_corpus_test]
test_labels = [y for (x, y) in sorted_corpus_test]
# Get train embeddings
for ii in range(0, len(train_labels), params.batch_size):
batch = train_samples[ii:ii + params.batch_size]
embeddings = batcher(params, batch)
train_embeddings.append(embeddings)
train_embeddings = np.vstack(train_embeddings)
logging.info('Computed train embeddings')
# Get test embeddings
for ii in range(0, len(test_labels), params.batch_size):
batch = test_samples[ii:ii + params.batch_size]
embeddings = batcher(params, batch)
test_embeddings.append(embeddings)
test_embeddings = np.vstack(test_embeddings)
logging.info('Computed test embeddings')
config_classifier = {'nclasses': 6, 'seed': self.seed,
'usepytorch': params.usepytorch,
'classifier': params.classifier,
'kfold': params.kfold}
clf = KFoldClassifier({'X': train_embeddings,
'y': np.array(train_labels)},
{'X': test_embeddings,
'y': np.array(test_labels)},
config_classifier)
devacc, testacc, _ = clf.run()
logging.debug('\nDev acc : {0} Test acc : {1} \
for TREC\n'.format(devacc, testacc))
return {'devacc': devacc, 'acc': testacc,
'ndev': len(self.train['X']), 'ntest': len(self.test['X'])}