in lambda/api/algorithm.py [0:0]
def select_variant(self):
"""
UCB1 algorithm is its “curiosity bonus”. When selecting an arm,
it takes the expected reward of each arm and then adds a bonus
which is calculated in inverse proportion to the confidence of that reward.
It is optimistic about uncertainty. So lower confidence arms are given a bit
of a boost relative to higher confidence arms.
"""
invocation_total = sum([v["invocation_count"] for v in self.variant_metrics])
ucb_values = []
for v in self.variant_metrics:
curiosity_bonus = math.sqrt(
(2 * math.log(invocation_total)) / float(v["invocation_count"])
)
rate = 1.0 * v["reward_sum"] / v["invocation_count"]
ucb_values.append(rate + curiosity_bonus)
variant_index = AlgorithmBase.argmax(ucb_values)
return self.variant_metrics[variant_index]["variant_name"]