in senteval/tools/validation.py [0:0]
def run(self):
logging.info('Training {0} with (inner) {1}-fold cross-validation'
.format(self.modelname, self.k))
regs = [10**t for t in range(-5, -1)] if self.usepytorch else \
[2**t for t in range(-2, 4, 1)]
skf = StratifiedKFold(n_splits=self.k, shuffle=True, random_state=1111)
innerskf = StratifiedKFold(n_splits=self.k, shuffle=True,
random_state=1111)
count = 0
for train_idx, test_idx in skf.split(self.X, self.y):
count += 1
X_train, X_test = self.X[train_idx], self.X[test_idx]
y_train, y_test = self.y[train_idx], self.y[test_idx]
scores = []
for reg in regs:
regscores = []
for inner_train_idx, inner_test_idx in innerskf.split(X_train, y_train):
X_in_train, X_in_test = X_train[inner_train_idx], X_train[inner_test_idx]
y_in_train, y_in_test = y_train[inner_train_idx], y_train[inner_test_idx]
if self.usepytorch:
clf = MLP(self.classifier_config, inputdim=self.featdim,
nclasses=self.nclasses, l2reg=reg,
seed=self.seed)
clf.fit(X_in_train, y_in_train,
validation_data=(X_in_test, y_in_test))
else:
clf = LogisticRegression(C=reg, random_state=self.seed)
clf.fit(X_in_train, y_in_train)
regscores.append(clf.score(X_in_test, y_in_test))
scores.append(round(100*np.mean(regscores), 2))
optreg = regs[np.argmax(scores)]
logging.info('Best param found at split {0}: l2reg = {1} \
with score {2}'.format(count, optreg, np.max(scores)))
self.devresults.append(np.max(scores))
if self.usepytorch:
clf = MLP(self.classifier_config, inputdim=self.featdim,
nclasses=self.nclasses, l2reg=optreg,
seed=self.seed)
clf.fit(X_train, y_train, validation_split=0.05)
else:
clf = LogisticRegression(C=optreg, random_state=self.seed)
clf.fit(X_train, y_train)
self.testresults.append(round(100*clf.score(X_test, y_test), 2))
devaccuracy = round(np.mean(self.devresults), 2)
testaccuracy = round(np.mean(self.testresults), 2)
return devaccuracy, testaccuracy