in isoexp/mab/contextual_mab_algs.py [0:0]
def get_action(self, context, deterministic=False):
if self.nb_iter < self.K:
return self.nb_iter
else:
# select the chosen_arm
expected_rewards = self.thetas.dot(context)
rnd = np.random.rand()
if not deterministic and rnd <= self.epsilon / (math.sqrt(self.nb_iter + 1) if self.decrease_epsilon else 1):
chosen_arm = np.random.choice(self.K)
else:
idxs = np.flatnonzero(np.isclose(expected_rewards, expected_rewards.max()))
chosen_arm = np.asscalar(np.random.choice(idxs))
return chosen_arm