in model/utils/classifier.py [0:0]
def get_score(self):
if not self.if_use:
return 0
train_sampler = SequentialSampler(self.train_dataset)
train_dataloader = DataLoader(
self.train_dataset,
sampler=train_sampler,
batch_size=self.batch_size,
)
# Train
self.model.eval()
X = np.array([])
y = np.array([])
for batch in train_dataloader:
inputs, labels = batch
inputs = inputs.to(self.device)
y = np.concatenate([y, labels.cpu().numpy().squeeze()], 0) if y.size else labels.numpy().squeeze()
with torch.no_grad():
outputs = self.model(inputs)[0].cpu().numpy().squeeze()
outputs = np.max(outputs, axis=1)
X = np.concatenate([X, outputs], 0) if X.size else outputs
# Define scikit learn model
# Normalize data
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
clf = svm.LinearSVC(max_iter=10000, dual=False)
clf.fit(X, y)
# Compute train_acc
y_train = clf.predict(X)
train_acc = accuracy_score(y, y_train)
# Evaluate
eval_sampler = SequentialSampler(self.eval_dataset)
eval_dataloader = DataLoader(
self.eval_dataset,
sampler=eval_sampler,
batch_size=self.batch_size,
)
# Eval!
X = np.array([])
y = np.array([])
for batch in eval_dataloader:
inputs, labels = batch
inputs = inputs.to(self.device)
y = np.concatenate([y, labels.cpu().numpy().squeeze()], 0) if y.size else labels.numpy().squeeze()
with torch.no_grad():
outputs = self.model(inputs)[0].cpu().numpy().squeeze()
outputs = np.max(outputs, axis=1)
X = np.concatenate([X, outputs], 0) if X.size else outputs
X = scaler.transform(X)
y_pred = clf.predict(X)
eval_acc = accuracy_score(y, y_pred)
self.test_text = None
self.real_text = None
torch.cuda.empty_cache()
return eval_acc