def BootstrapedUCB()

in isoexp/mab/smab_algs.py [0:0]


def BootstrapedUCB(T, MAB, delta = 0.1, b_rep = 200):

    K = len(MAB)
    rewards = np.zeros((T,))
    draws = np.zeros((T,))

    N = np.zeros((K,))  
    S = np.zeros((K,))
    rewards_arm = {}
    for k in range(K):
        a = k
        r = 1*MAB[a].sample().squeeze()

        rewards[k] = r
        draws[k] = a
        rewards_arm[k] = [r]
        S[a] += r
        N[a] += 1

    for t in range(K, T):
        alpha = 1/(t+1)
        bootstrap_quantile = quantile((1-delta)*alpha, S, N, rewards_arm, B = b_rep)
        phi = np.sqrt(2*np.log(1/alpha)/N)
        ## Theoretical ucb 
        #ucb = S / N   + (bootstrap_quantile + np.sqrt(np.log(2/(delta*alpha))/N)*phi)
        ## Ucb used in practice 
        ucb = S / N   + (bootstrap_quantile + np.sqrt(1/N)*phi)
        idxs = np.flatnonzero(np.isclose(ucb, ucb.max()))
        a = np.asscalar(np.random.choice(idxs))
        r = 1*MAB[a].sample().squeeze()
        rewards[t] = r
        draws[t] = a
        rewards_arm[a].append(r)
        S[a] += r
        N[a] += 1

    return rewards, draws