in senteval/probing.py [0:0]
def run(self, params, batcher):
task_embed = {'train': {}, 'dev': {}, 'test': {}}
bsize = params.batch_size
logging.info('Computing embeddings for train/dev/test')
for key in self.task_data:
# Sort to reduce padding
sorted_data = sorted(zip(self.task_data[key]['X'],
self.task_data[key]['y']),
key=lambda z: (len(z[0]), z[1]))
self.task_data[key]['X'], self.task_data[key]['y'] = map(list, zip(*sorted_data))
task_embed[key]['X'] = []
for ii in range(0, len(self.task_data[key]['y']), bsize):
batch = self.task_data[key]['X'][ii:ii + bsize]
embeddings = batcher(params, batch)
task_embed[key]['X'].append(embeddings)
task_embed[key]['X'] = np.vstack(task_embed[key]['X'])
task_embed[key]['y'] = np.array(self.task_data[key]['y'])
logging.info('Computed embeddings')
config_classifier = {'nclasses': self.nclasses, 'seed': self.seed,
'usepytorch': params.usepytorch,
'classifier': params.classifier}
if self.task == "WordContent" and params.classifier['nhid'] > 0:
config_classifier = copy.deepcopy(config_classifier)
config_classifier['classifier']['nhid'] = 0
print(params.classifier['nhid'])
clf = SplitClassifier(X={'train': task_embed['train']['X'],
'valid': task_embed['dev']['X'],
'test': task_embed['test']['X']},
y={'train': task_embed['train']['y'],
'valid': task_embed['dev']['y'],
'test': task_embed['test']['y']},
config=config_classifier)
devacc, testacc = clf.run()
logging.debug('\nDev acc : %.1f Test acc : %.1f for %s classification\n' % (devacc, testacc, self.task.upper()))
return {'devacc': devacc, 'acc': testacc,
'ndev': len(task_embed['dev']['X']),
'ntest': len(task_embed['test']['X'])}