in isoexp/devfair_reward_attack.py [0:0]
def work(m, nb_arms, nb_features, noise, nb_simu, T, all_algs, random_state, M=1, bound_context=1, dataset=False, which=None):
# create model
K = nb_arms
if dataset:
if which == 'jester':
arm_file = os.path.abspath("examples/jester/Vt_jester.csv")
user_file = os.path.abspath("examples/jester/U.csv")
model = arms.DatasetModel(arm_csvfile=arm_file, user_csvfile=user_file, noise=noise, random_state=random_state)
else:
arm_file = os.path.abspath('examples/movielens/Vt_movielens.csv')
user_file = os.path.abspath('examples/movielens/U.csv')
model = arms.DatasetModel(arm_csvfile=arm_file, user_csvfile=user_file, noise=noise, random_state=random_state, arms_limit=25)
else:
model = arms.RandomContextualLinearArms(n_actions=K, n_features=nb_features, noise=noise,
random_state=random_state, bound_context=bound_context)
theta_bound = np.max(np.linalg.norm(model.thetas, axis=1))
target_context = np.random.randint(low=0, high=len(model.context_lists))
other_context = np.random.randint(low=0, high=len(model.context_lists))
# while other_context == target_context:
# other_context = np.random.randint(low=0, high=len(model.context_lists))
target_arm = np.random.randint(low=0, high=model.n_actions)
AAA = []
for alg_name in tqdm(all_algs.keys(), desc='Sim. model {}'.format(m)):
args = {'nb_arms': model.n_actions,
'dimension': model.n_features,
'bound_features': theta_bound,
'bound_context': model.bound_context,
'reg_factor': 0.1,
'delta': delta,
'noise_variance': noise,
}
if 'Exp4' in alg_name:
eta = np.sqrt(2 * np.log(M) / (T * model.n_actions))
experts = []
for i in range(M - 2):
experts.append(exp(nb_arms=model.n_actions, type='random'))
experts.append(exp(nb_arms=model.n_actions, type='optimal', m=model))
experts.append(exp(nb_arms=model.n_actions, type='', a_star=int(target_arm)))
args['experts'] = experts
args['eta'] = eta
alg = all_algs[alg_name](**args)
if 'attacked' in alg_name:
if 'gamma' in alg_name:
temp_eps = re.findall(r'[\d\.\d]+', alg_name)
temp_eps = np.array(list(map(float, temp_eps)))
temp_eps = temp_eps[temp_eps<=1]
temp_eps = temp_eps[0]
temp_args = args.copy()
temp_args['eps'] = temp_eps
attacker = RewardAttacker(**temp_args)
regret = np.zeros((nb_simu, T//frequency)) #[[]] * nb_simu #np.zeros((nb_simu, T))
draws = regret.copy()
epsilon_norm = np.zeros((nb_simu, T//frequency)) #[[]] * nb_simu #np.zeros((nb_simu, T))
# thetas_alg = np.zeros((nb_simu, T, model.n_actions, model.n_features))
# prod_scalar = np.zeros((nb_simu, T, model.n_actions, model.n))
rewards_range = np.zeros((nb_simu, T//frequency)) #[[]] * nb_simu # np.zeros((nb_simu, T))
for k in range(nb_simu):
alg.reset()
if 'attacked' in alg_name and not 'stationary' in alg_name:
attacker.reset()
attack_acumulator = 0
regret_accumulator = 0
rewards_range_max = 0
draws_accumulator = 0
for t in trange(T):
context = model.get_context()
a_t = alg.get_action(context)
r_t = model.reward(context, a_t)
if 'attacked' in alg_name:
if not 'stationary' in alg_name:
attacker.update(context, a_t, r_t)
attack_t = attacker.compute_attack(a_t, context, target_arm)
else:
if a_t != target_arm:
attack_t = -r_t + noise*np.random.randn()
else:
attack_t = 0
# print('attack_t =', attack_t)
else:
attack_t = 0
alg.update(context, a_t, min(1, max(0, r_t+attack_t)))
attack_acumulator+= np.abs(attack_t)
regret_accumulator+= model.best_arm_reward(context) - np.dot(model.thetas[a_t], context)
rewards_range_max = max(rewards_range_max, min(1, max(r_t + attack_t, 0)))
draws_accumulator +=1 if a_t == target_arm else 0
if t % frequency == 0: # logging
epsilon_norm[k, t // frequency]= attack_acumulator
regret[k, t // frequency]= regret_accumulator
rewards_range[k, t // frequency]= rewards_range_max
draws[k, t // frequency]= draws_accumulator
attack_acumulator = 0
regret_accumulator = 0
rewards_range_max = 0
draws_accumulator = 0
# print('reward = ', min(1, max(r_t + attack_t, 0)))
# print('Target arm =', target_arm, 'a_t =', a_t)
# alg.update(context, a_t, r_t + attack_t)
# if hasattr(alg, 'thetas_hat'):
# thetas_alg[k, t] = alg.thetas_hat
# for a in range(model.n_actions):
# for i, x in enumerate(model.context_lists):
# if 'attacked' in alg_name:
# p = np.dot(alg.thetas_hat[a], x) - (1 - attacker.eps) * np.dot(model.thetas[target_arm], x)
# else:
# p = np.dot(alg.thetas_hat[a], x) - np.dot(model.thetas[target_arm], x)
# prod_scalar[k, t, a, i] = p
# print('-'*100)
# print('r_t =', r_t)
# print('atttack_t =', attack_t)
# print('r_t + attack_t = ', r_t + attack_t)
# rewards_range[k, t] = min(1, max(r_t + attack_t, 0))
AAA += [(alg_name, {"regret": regret, "attack_cond": epsilon_norm, "target_draws": draws, "thetas": (),
"prod_scalar": (), "range_rewards": rewards_range})]
return m, AAA, model, target_arm