def get_action()

in isoexp/mab/contextual_mab_algs.py [0:0]

12 lines of code
5 McCabe index (conditional complexity)


    def get_action(self, context, deterministic=False):
        if self.nb_iter < self.K:
            return self.nb_iter
        else:
            # select the chosen_arm
            expected_rewards = self.thetas.dot(context)

            rnd = np.random.rand()
            if not deterministic and rnd <= self.epsilon / (math.sqrt(self.nb_iter + 1) if self.decrease_epsilon else 1):
                chosen_arm = np.random.choice(self.K)
            else:
                idxs = np.flatnonzero(np.isclose(expected_rewards, expected_rewards.max()))
                chosen_arm = np.asscalar(np.random.choice(idxs))
            return chosen_arm