in senteval/tools/validation.py [0:0]
def run(self):
# cross-validation
logging.info('Training {0} with {1}-fold cross-validation'
.format(self.modelname, self.k))
regs = [10**t for t in range(-5, -1)] if self.usepytorch else \
[2**t for t in range(-1, 6, 1)]
skf = StratifiedKFold(n_splits=self.k, shuffle=True,
random_state=self.seed)
scores = []
for reg in regs:
scanscores = []
for train_idx, test_idx in skf.split(self.train['X'],
self.train['y']):
# Split data
X_train, y_train = self.train['X'][train_idx], self.train['y'][train_idx]
X_test, y_test = self.train['X'][test_idx], self.train['y'][test_idx]
# Train classifier
if self.usepytorch:
clf = MLP(self.classifier_config, inputdim=self.featdim,
nclasses=self.nclasses, l2reg=reg,
seed=self.seed)
clf.fit(X_train, y_train, validation_data=(X_test, y_test))
else:
clf = LogisticRegression(C=reg, random_state=self.seed)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
scanscores.append(score)
# Append mean score
scores.append(round(100*np.mean(scanscores), 2))
# evaluation
logging.info([('reg:' + str(regs[idx]), scores[idx])
for idx in range(len(scores))])
optreg = regs[np.argmax(scores)]
devaccuracy = np.max(scores)
logging.info('Cross-validation : best param found is reg = {0} \
with score {1}'.format(optreg, devaccuracy))
logging.info('Evaluating...')
if self.usepytorch:
clf = MLP(self.classifier_config, inputdim=self.featdim,
nclasses=self.nclasses, l2reg=optreg,
seed=self.seed)
clf.fit(self.train['X'], self.train['y'], validation_split=0.05)
else:
clf = LogisticRegression(C=optreg, random_state=self.seed)
clf.fit(self.train['X'], self.train['y'])
yhat = clf.predict(self.test['X'])
testaccuracy = clf.score(self.test['X'], self.test['y'])
testaccuracy = round(100*testaccuracy, 2)
return devaccuracy, testaccuracy, yhat