in train.py [0:0]
def evaluate(model, data_loader, task):
"""Evaluate model on dataset and return metrics.
Args:
datum: a Data object to determine input shapes for GVP-based models.
model_name: choose from ['bert', 'gvp', 'bert_gvp', 'gat', 'bert_gat']
num_outputs: number of output units
weights: label weights for multi-output models
Returns:
model object (One of: bert, gat, bert_gat, gvp or bert_gvp)
"""
# make predictions on test set
device = torch.device("cuda:0")
model = model.to(device)
model.eval()
y_preds = []
y_true = []
with torch.no_grad():
for batch in data_loader:
if isinstance(batch, Sequence):
y_true.append(batch[-1])
batch = [b.to(device) for b in batch]
else:
y_true.append(batch["labels"])
batch = {key: val.to(device) for key, val in batch.items()}
y_pred = model(batch)
if y_pred.ndim == 1:
y_pred = y_pred.unsqueeze(1)
y_preds.append(y_pred.cpu())
y_preds = torch.vstack(y_preds).numpy()
y_true = torch.vstack(y_true).numpy()
print(y_preds.shape, y_true.shape)
if task in ("cc", "bp", "mf"):
# multi-label classification
f_max, micro_aupr = deepfrier_utils.evaluate_multilabel(
y_true.numpy(), y_preds.numpy()
)
scores = {"f_max": f_max, "aupr": micro_aupr}
print("F_max = {:1.3f}".format(scores["f_max"]))
print("AUPR = {:1.3f}".format(scores["aupr"]))
else:
# single task regression
mse = metrics.mean_squared_error(y_true, y_preds)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_true, y_preds)
rho, _ = stats.spearmanr(y_true, y_preds)
scores = {"mse": float(mse), "rmse": float(rmse), "r2": r2, "rho": rho}
for key, score in scores.items():
print("{} = {:1.3f}".format(key, score))
return scores