in aepsych/benchmark/problem.py [0:0]
def evaluate(self, strat: aepsych.strategy.SequentialStrategy) -> Dict[str, float]:
"""Evaluate the strategy with respect to this problem.
Args:
strat (aepsych.strategy.SequentialStrategy): Strategy to evaluate.
Returns:
Dict[str, float]: A dictionary containing metrics and their values,
including parent class metrics.
"""
metrics = super().evaluate(strat)
assert (
strat.has_model
), "Can only evaluate a strat that has an underlying model!"
thresh = self.options.get("thresh", 0.75)
gridsize = self.options.get("gridsize", 10)
post_mean, _ = strat.predict(self.eval_grid)
dim = self.eval_grid.shape[1]
post_mean_reshape = post_mean.reshape((gridsize,) * dim)
phi_post_mean = norm.cdf(post_mean_reshape.detach().numpy())
# assume mono_dim is last dim (TODO make this better)
x1 = dim_grid(
lower=strat.lb.numpy()[-1],
upper=strat.ub.numpy()[-1],
dim=1,
gridsize=gridsize,
).squeeze()
x2_hat = get_lse_contour(phi_post_mean, x1, level=thresh, lb=-1.0, ub=1.0)
true_f = self.f(self.eval_grid)
true_f_reshape = true_f.reshape((gridsize,) * dim)
true_x2 = get_lse_contour(
norm.cdf(true_f_reshape), x1, level=thresh, lb=-1.0, ub=1.0
)
assert x2_hat.shape == true_x2.shape, (
"x2_hat.shape != true_x2.shape, something went wrong!"
+ f"x2_hat.shape={x2_hat.shape}, true_x2.shape={true_x2.shape}"
)
mae = np.mean(np.abs(true_x2 - x2_hat))
mse = np.mean((true_x2 - x2_hat) ** 2)
max_abs_err = np.max(np.abs(true_x2 - x2_hat))
metrics["mean_abs_err_thresh"] = mae
metrics["mean_square_err_thresh"] = mse
metrics["max_abs_err_thresh"] = max_abs_err
if dim != 1:
corr = pearsonr(true_x2.flatten(), x2_hat.flatten())[0]
metrics["pearson_corr_thresh"] = corr
# now construct integrated error on thresh
fsamps = strat.sample(self.eval_grid, num_samples=1000).detach().numpy()
square_samps = [s.reshape((gridsize,) * strat.model.dim) for s in fsamps]
contours = np.stack(
[
get_lse_contour(norm.cdf(s), x1, level=thresh, mono_dim=-1, lb=-1, ub=1)
for s in square_samps
]
)
thresh_err = contours - true_x2[None, :]
metrics["mean_integrated_abs_err_thresh"] = np.mean(np.abs(thresh_err))
metrics["mean_integrated_square_err_thresh"] = np.mean(thresh_err ** 2)
return metrics