def work()

in isoexp/devfair_reward_attack.py [0:0]


def work(m, nb_arms, nb_features, noise, nb_simu, T, all_algs, random_state, M=1, bound_context=1, dataset=False, which=None):
    # create model
    K = nb_arms
    if dataset:
        if which == 'jester':
            arm_file = os.path.abspath("examples/jester/Vt_jester.csv")
            user_file = os.path.abspath("examples/jester/U.csv")
            model = arms.DatasetModel(arm_csvfile=arm_file, user_csvfile=user_file, noise=noise, random_state=random_state)
        else:
            arm_file = os.path.abspath('examples/movielens/Vt_movielens.csv')
            user_file = os.path.abspath('examples/movielens/U.csv')
            model = arms.DatasetModel(arm_csvfile=arm_file, user_csvfile=user_file, noise=noise, random_state=random_state,  arms_limit=25)
    else:
        model = arms.RandomContextualLinearArms(n_actions=K, n_features=nb_features, noise=noise,
                                                random_state=random_state, bound_context=bound_context)
    theta_bound = np.max(np.linalg.norm(model.thetas, axis=1))
    target_context = np.random.randint(low=0, high=len(model.context_lists))
    other_context = np.random.randint(low=0, high=len(model.context_lists))
    # while other_context == target_context:
    #     other_context = np.random.randint(low=0, high=len(model.context_lists))
    target_arm = np.random.randint(low=0, high=model.n_actions)
    AAA = []
    for alg_name in tqdm(all_algs.keys(), desc='Sim. model {}'.format(m)):
        args = {'nb_arms': model.n_actions,
                'dimension': model.n_features,
                'bound_features': theta_bound,
                'bound_context': model.bound_context,
                'reg_factor': 0.1,
                'delta': delta,
                'noise_variance': noise,
                }
        if 'Exp4' in alg_name:
            eta = np.sqrt(2 * np.log(M) / (T * model.n_actions))
            experts = []
            for i in range(M - 2):
                experts.append(exp(nb_arms=model.n_actions, type='random'))
            experts.append(exp(nb_arms=model.n_actions, type='optimal', m=model))
            experts.append(exp(nb_arms=model.n_actions, type='', a_star=int(target_arm)))
            args['experts'] = experts
            args['eta'] = eta
        alg = all_algs[alg_name](**args)
        if 'attacked' in alg_name:
            if 'gamma' in alg_name:
                temp_eps = re.findall(r'[\d\.\d]+', alg_name)
                temp_eps = np.array(list(map(float, temp_eps)))
                temp_eps = temp_eps[temp_eps<=1]
                temp_eps = temp_eps[0]
                temp_args = args.copy()
                temp_args['eps'] = temp_eps
                attacker = RewardAttacker(**temp_args)
        regret = np.zeros((nb_simu, T//frequency))  #[[]] * nb_simu #np.zeros((nb_simu, T))
        draws = regret.copy()
        epsilon_norm = np.zeros((nb_simu, T//frequency))  #[[]] * nb_simu #np.zeros((nb_simu, T))
        # thetas_alg = np.zeros((nb_simu, T, model.n_actions, model.n_features))
        # prod_scalar = np.zeros((nb_simu, T, model.n_actions, model.n))
        rewards_range =  np.zeros((nb_simu, T//frequency))  #[[]] * nb_simu # np.zeros((nb_simu, T))

        for k in range(nb_simu):

            alg.reset()

            if 'attacked' in alg_name and not 'stationary' in alg_name:
                attacker.reset()

            attack_acumulator = 0
            regret_accumulator = 0
            rewards_range_max = 0
            draws_accumulator = 0
            for t in trange(T):

                context = model.get_context()
                a_t = alg.get_action(context)
                r_t = model.reward(context, a_t)
                if 'attacked' in alg_name:
                    if not 'stationary' in alg_name:
                        attacker.update(context, a_t, r_t)
                        attack_t = attacker.compute_attack(a_t, context, target_arm)
                    else:
                        if a_t != target_arm:
                            attack_t = -r_t + noise*np.random.randn()
                        else:
                            attack_t = 0
                    # print('attack_t =', attack_t)
                else:
                    attack_t = 0
                alg.update(context, a_t, min(1, max(0, r_t+attack_t)))

                attack_acumulator+= np.abs(attack_t)
                regret_accumulator+= model.best_arm_reward(context) - np.dot(model.thetas[a_t], context)
                rewards_range_max = max(rewards_range_max, min(1, max(r_t + attack_t, 0)))
                draws_accumulator +=1 if a_t == target_arm else 0
                if t % frequency == 0: # logging
                    epsilon_norm[k, t // frequency]= attack_acumulator
                    regret[k, t // frequency]= regret_accumulator
                    rewards_range[k, t // frequency]= rewards_range_max
                    draws[k, t // frequency]= draws_accumulator
                    attack_acumulator = 0
                    regret_accumulator = 0
                    rewards_range_max = 0
                    draws_accumulator = 0

                # print('reward = ', min(1, max(r_t + attack_t, 0)))
                # print('Target arm =', target_arm, 'a_t =', a_t)
                # alg.update(context, a_t, r_t + attack_t)
                # if hasattr(alg, 'thetas_hat'):
                    # thetas_alg[k, t] = alg.thetas_hat
                    # for a in range(model.n_actions):
                    #     for i, x in enumerate(model.context_lists):
                    #         if 'attacked' in alg_name:
                    #             p = np.dot(alg.thetas_hat[a], x) - (1 - attacker.eps) * np.dot(model.thetas[target_arm], x)
                    #         else:
                    #             p = np.dot(alg.thetas_hat[a], x) - np.dot(model.thetas[target_arm], x)
                    #         prod_scalar[k, t, a, i] = p
                # print('-'*100)
                # print('r_t =', r_t)
                # print('atttack_t =', attack_t)
                # print('r_t + attack_t = ', r_t + attack_t)
                # rewards_range[k, t] = min(1, max(r_t + attack_t, 0))




        AAA += [(alg_name, {"regret": regret, "attack_cond": epsilon_norm, "target_draws": draws, "thetas": (),
                            "prod_scalar": (), "range_rewards": rewards_range})]

    return m, AAA, model, target_arm