def SoftMAB()

in isoexp/mab/smab_algs.py [0:0]
24 lines of code
4 McCabe index (conditional complexity)

def SoftMAB(T, MAB, temp=1.0):
    """
    Args:
        T (int): horizon
        MAB (list): list of available MAB models
    Returns:
        rewards (array-like): observed rewards
        draws (array-like): indexes of selected arms
    """
    K = len(MAB)
    rewards = np.zeros((T,))
    draws = np.zeros((T,))

    N = np.ones((K,))  # number of observations of each arm
    S = np.zeros((K,))  # sum of rewards for each arm

    for k in range(K):
        a = k
        r = MAB[a].sample()

        # update quantities
        rewards[k] = r
        draws[k] = a
        S[a] += r
        N[a] += 1

    for t in range(K, T):
        # select the arm
        ucb = S / N

        proba = np.exp(ucb / temp)
        proba = proba / np.sum(proba)
        a = np.random.choice(K, p=proba)

        r = MAB[a].sample()

        # update quantities
        rewards[t] = r
        draws[t] = a
        S[a] += r
        N[a] += 1

    return rewards, draws