in research/pate_2017/analysis.py [0:0]
def main(unused_argv):
##################################################################
# If we are reproducing results from paper https://arxiv.org/abs/1610.05755,
# download the required binaries with label information.
##################################################################
# Binaries for MNIST results
paper_binaries_mnist = \
["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true",
"https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_100_indices_used_by_student.npy?raw=true"]
if FLAGS.counts_file == "mnist_250_teachers_labels.npy" \
or FLAGS.indices_file == "mnist_250_teachers_100_indices_used_by_student.npy":
maybe_download(paper_binaries_mnist, os.getcwd())
# Binaries for SVHN results
paper_binaries_svhn = ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/svhn_250_teachers_labels.npy?raw=true"]
if FLAGS.counts_file == "svhn_250_teachers_labels.npy":
maybe_download(paper_binaries_svhn, os.getcwd())
input_mat = np.load(FLAGS.counts_file)
if FLAGS.input_is_counts:
counts_mat = input_mat
else:
# In this case, the input is the raw predictions. Transform
num_teachers, n = input_mat.shape
counts_mat = np.zeros((n, 10)).astype(np.int32)
for i in range(n):
for j in range(num_teachers):
counts_mat[i, int(input_mat[j, i])] += 1
n = counts_mat.shape[0]
num_examples = min(n, FLAGS.max_examples)
if not FLAGS.indices_file:
indices = np.array(range(num_examples))
else:
index_list = np.load(FLAGS.indices_file)
indices = index_list[:num_examples]
l_list = 1.0 + np.array(xrange(FLAGS.moments))
beta = FLAGS.beta
total_log_mgf_nm = np.array([0.0 for _ in l_list])
total_ss_nm = np.array([0.0 for _ in l_list])
noise_eps = FLAGS.noise_eps
for i in indices:
total_log_mgf_nm += np.array(
[logmgf_from_counts(counts_mat[i], noise_eps, l)
for l in l_list])
total_ss_nm += np.array(
[smoothed_sens(counts_mat[i], noise_eps, l, beta)
for l in l_list])
delta = FLAGS.delta
# We want delta = exp(alpha - eps l).
# Solving gives eps = (alpha - ln (delta))/l
eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list
print("Epsilons (Noisy Max): " + str(eps_list_nm))
print("Smoothed sensitivities (Noisy Max): " + str(total_ss_nm / l_list))
# If beta < eps / 2 ln (1/delta), then adding noise Lap(1) * 2 SS/eps
# is eps,delta DP
# Also if beta < eps / 2(gamma +1), then adding noise 2(gamma+1) SS eta / eps
# where eta has density proportional to 1 / (1+|z|^gamma) is eps-DP
# Both from Corolloary 2.4 in
# http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf
# Print the first one's scale
ss_eps = 2.0 * beta * math.log(1/delta)
ss_scale = 2.0 / ss_eps
print("To get an " + str(ss_eps) + "-DP estimate of epsilon, ")
print("..add noise ~ " + str(ss_scale))
print("... times " + str(total_ss_nm / l_list))
print("Epsilon = " + str(min(eps_list_nm)) + ".")
if min(eps_list_nm) == eps_list_nm[-1]:
print("Warning: May not have used enough values of l")
# Data independent bound, as mechanism is
# 2*noise_eps DP.
data_ind_log_mgf = np.array([0.0 for _ in l_list])
data_ind_log_mgf += num_examples * np.array(
[logmgf_exact(1.0, 2.0 * noise_eps, l) for l in l_list])
data_ind_eps_list = (data_ind_log_mgf - math.log(delta)) / l_list
print("Data independent bound = " + str(min(data_ind_eps_list)) + ".")
return