in isoexp/linear/linearbandit.py [0:0]
def work(alg_name, alg):
regret = np.zeros((nb_simu, T))
norms = np.zeros((nb_simu, T))
for k in trange(nb_simu, desc='Simulating {}'.format(alg_name)):
alg.reset()
for t in trange(T, desc='Current episode :', leave=True):
a_t = alg.get_action()
# print(a_t)
r_t = model.reward(a_t)
alg.update(a_t, r_t)
regret[k, t] = link(model.best_arm_reward()) - link(np.dot(model.theta, model.features[a_t]))
if hasattr(alg, 'theta_hat'):
norms[k, t] = np.linalg.norm(alg.theta_hat - model.theta, 2)
return alg_name, MABResults(regret=regret, norm_error=norms)