in aepsych/benchmark/problem.py [0:0]
def evaluate(self, strat: aepsych.strategy.SequentialStrategy) -> Dict[str, float]:
"""Evaluate the strategy with respect to this problem.
Extend this in subclasses to add additional metrics.
Args:
strat (aepsych.strategy.SequentialStrategy): Strategy to evaluate.
Returns:
Dict[str, float]: A dictionary containing metrics and their values.
"""
# always eval f
f_true = self.f_true().numpy()
f_hat = self.f_hat(strat).detach().numpy()
assert (
f_true.shape == f_hat.shape
), f"f_true.shape=={f_true.shape} != f_hat.shape=={f_hat.shape}"
p_true = norm.cdf(f_true)
p_hat = norm.cdf(f_hat)
mae_f = np.mean(np.abs(f_true - f_hat))
mse_f = np.mean((f_true - f_hat) ** 2)
max_abs_err_f = np.max(np.abs(f_true - f_hat))
corr_f = pearsonr(f_true.flatten(), f_hat.flatten())[0]
mae_p = np.mean(np.abs(p_true - p_hat))
mse_p = np.mean((p_true - p_hat) ** 2)
max_abs_err_p = np.max(np.abs(p_true - p_hat))
corr_p = pearsonr(p_true.flatten(), p_hat.flatten())[0]
# eval in samp-based expectation over posterior instead of just mean
fsamps = strat.sample(self.eval_grid, num_samples=1000).detach().numpy()
ferrs = fsamps - f_true[None, :]
miae_f = np.mean(np.abs(ferrs))
mise_f = np.mean(ferrs ** 2)
perrs = norm.cdf(fsamps) - norm.cdf(f_true[None, :])
miae_p = np.mean(np.abs(perrs))
mise_p = np.mean(perrs ** 2)
metrics = {
"mean_abs_err_f": mae_f,
"mean_integrated_abs_err_f": miae_f,
"mean_square_err_f": mse_f,
"mean_integrated_square_err_f": mise_f,
"max_abs_err_f": max_abs_err_f,
"pearson_corr_f": corr_f,
"mean_abs_err_p": mae_p,
"mean_integrated_abs_err_p": miae_p,
"mean_square_err_p": mse_p,
"mean_integrated_square_err_p": mise_p,
"max_abs_err_p": max_abs_err_p,
"pearson_corr_p": corr_p,
}
return metrics