def select_variant()

in lambda/api/algorithm.py [0:0]


    def select_variant(self):
        """
        UCB1 algorithm is its “curiosity bonus”. When selecting an arm,
        it takes the expected reward of each arm and then adds a bonus
        which is calculated in inverse proportion to the confidence of that reward.
        It is optimistic about uncertainty. So lower confidence arms are given a bit
        of a boost relative to higher confidence arms.
        """
        invocation_total = sum([v["invocation_count"] for v in self.variant_metrics])
        ucb_values = []
        for v in self.variant_metrics:
            curiosity_bonus = math.sqrt(
                (2 * math.log(invocation_total)) / float(v["invocation_count"])
            )
            rate = 1.0 * v["reward_sum"] / v["invocation_count"]
            ucb_values.append(rate + curiosity_bonus)
        variant_index = AlgorithmBase.argmax(ucb_values)
        return self.variant_metrics[variant_index]["variant_name"]