in research/pate_2018/ICLR2018/smooth_sensitivity_table.py [0:0]
def _find_optimal_smooth_sensitivity_parameters(
votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1,
ind_step2, order):
"""Optimizes smooth sensitivity parameters by minimizing a cost function.
The cost function is
exact_eps + cost of GNSS + two stds of noise,
which captures that upper bound of the confidence interval of the sanitized
privacy budget.
Since optimization is done with full view of sensitive data, the results
cannot be released.
"""
rdp_cum = 0
answered_cum = 0
ls_cum = 0
# Define a plausible range for the beta values.
betas = np.arange(.3 / order, .495 / order, .01 / order)
cost_delta = math.log(1 / delta) / (order - 1)
for i, v in enumerate(votes):
if threshold is None:
log_pr_answered = 0
rdp1 = 0
ls_step1 = np.zeros(num_teachers)
else:
log_pr_answered = pate.compute_logpr_answered(threshold, sigma1,
v - baseline[i,])
if ind_step1: # apply data-independent bound for step 1 (thresholding).
rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order)
ls_step1 = np.zeros(num_teachers)
else:
rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order)
ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
v - baseline[i,], num_teachers, threshold, sigma1, order)
pr_answered = math.exp(log_pr_answered)
answered_cum += pr_answered
if ind_step2: # apply data-independent bound for step 2 (GNMax).
rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
ls_step2 = np.zeros(num_teachers)
else:
logq_step2 = pate.compute_logq_gaussian(v, sigma2)
rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
# Compute smooth sensitivity.
ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
v, num_teachers, sigma2, order)
rdp_cum += rdp1 + pr_answered * rdp2
ls_cum += ls_step1 + pr_answered * ls_step2 # Expected local sensitivity.
if ind_step1 and ind_step2:
# Data-independent bounds.
cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
else:
# Data-dependent bounds.
cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None
for beta in betas:
ss = pate_ss.compute_discounted_max(beta, ls_cum)
# Solution to the minimization problem:
# min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
beta, sigma_ss, order)
# Cost captures exact_eps + cost of releasing SS + two stds of noise.
cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
if cost < cost_opt:
cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss
if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
eps_before_ss = rdp_cum + cost_delta
eps_with_ss = (
eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
beta_opt, sigma_ss_opt, order))
print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
'{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'.
format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
sys.stdout.flush()
# Return optimal parameters for the last iteration.
return beta_opt, ss_opt, sigma_ss_opt