def main()

in research/pate_2017/analysis.py [0:0]


def main(unused_argv):
  ##################################################################
  # If we are reproducing results from paper https://arxiv.org/abs/1610.05755,
  # download the required binaries with label information.
  ##################################################################

  # Binaries for MNIST results
  paper_binaries_mnist = \
    ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true",
    "https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_100_indices_used_by_student.npy?raw=true"]
  if FLAGS.counts_file == "mnist_250_teachers_labels.npy" \
    or FLAGS.indices_file == "mnist_250_teachers_100_indices_used_by_student.npy":
    maybe_download(paper_binaries_mnist, os.getcwd())

  # Binaries for SVHN results
  paper_binaries_svhn = ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/svhn_250_teachers_labels.npy?raw=true"]
  if FLAGS.counts_file == "svhn_250_teachers_labels.npy":
    maybe_download(paper_binaries_svhn, os.getcwd())

  input_mat = np.load(FLAGS.counts_file)
  if FLAGS.input_is_counts:
    counts_mat = input_mat
  else:
    # In this case, the input is the raw predictions. Transform
    num_teachers, n = input_mat.shape
    counts_mat = np.zeros((n, 10)).astype(np.int32)
    for i in range(n):
      for j in range(num_teachers):
        counts_mat[i, int(input_mat[j, i])] += 1
  n = counts_mat.shape[0]
  num_examples = min(n, FLAGS.max_examples)

  if not FLAGS.indices_file:
    indices = np.array(range(num_examples))
  else:
    index_list = np.load(FLAGS.indices_file)
    indices = index_list[:num_examples]

  l_list = 1.0 + np.array(xrange(FLAGS.moments))
  beta = FLAGS.beta
  total_log_mgf_nm = np.array([0.0 for _ in l_list])
  total_ss_nm = np.array([0.0 for _ in l_list])
  noise_eps = FLAGS.noise_eps

  for i in indices:
    total_log_mgf_nm += np.array(
        [logmgf_from_counts(counts_mat[i], noise_eps, l)
         for l in l_list])
    total_ss_nm += np.array(
        [smoothed_sens(counts_mat[i], noise_eps, l, beta)
         for l in l_list])
  delta = FLAGS.delta

  # We want delta = exp(alpha - eps l).
  # Solving gives eps = (alpha - ln (delta))/l
  eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list

  print("Epsilons (Noisy Max): " + str(eps_list_nm))
  print("Smoothed sensitivities (Noisy Max): " + str(total_ss_nm / l_list))

  # If beta < eps / 2 ln (1/delta), then adding noise Lap(1) * 2 SS/eps
  # is eps,delta DP
  # Also if beta < eps / 2(gamma +1), then adding noise 2(gamma+1) SS eta / eps
  # where eta has density proportional to 1 / (1+|z|^gamma) is eps-DP
  # Both from Corolloary 2.4 in
  # http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf
  # Print the first one's scale
  ss_eps = 2.0 * beta * math.log(1/delta)
  ss_scale = 2.0 / ss_eps
  print("To get an " + str(ss_eps) + "-DP estimate of epsilon, ")
  print("..add noise ~ " + str(ss_scale))
  print("... times " + str(total_ss_nm / l_list))
  print("Epsilon = " + str(min(eps_list_nm)) + ".")
  if min(eps_list_nm) == eps_list_nm[-1]:
    print("Warning: May not have used enough values of l")

  # Data independent bound, as mechanism is
  # 2*noise_eps DP.
  data_ind_log_mgf = np.array([0.0 for _ in l_list])
  data_ind_log_mgf += num_examples * np.array(
      [logmgf_exact(1.0, 2.0 * noise_eps, l) for l in l_list])

  data_ind_eps_list = (data_ind_log_mgf - math.log(delta)) / l_list
  print("Data independent bound = " + str(min(data_ind_eps_list)) + ".")

  return