in research/pate_2018/ICLR2018/rdp_cumulative.py [0:0]
def run_analysis(votes, mechanism, noise_scale, params):
"""Computes data-dependent privacy.
Args:
votes: A matrix of votes, where each row contains votes in one instance.
mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf')
noise_scale: A mechanism privacy parameter.
params: Other privacy parameters.
Returns:
Four lists: cumulative privacy cost epsilon, how privacy budget is split,
how many queries were answered, optimal order.
"""
def compute_partition(order_opt, eps):
order_opt_idx = np.searchsorted(orders, order_opt)
if mechanism == 'gnmax_conf':
p = (rdp_select_cum[order_opt_idx],
rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx],
-math.log(delta) / (order_opt - 1))
else:
p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1))
return [x / eps for x in p] # Ensures that sum(x) == 1
# Short list of orders.
# orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1),
# np.logspace(np.log10(50), np.log10(1000), num=20))))
# Long list of orders.
orders = np.concatenate((np.arange(2, 100 + 1, .5),
np.logspace(np.log10(100), np.log10(500), num=100)))
delta = 1e-8
n = votes.shape[0]
eps_total = np.zeros(n)
partition = [None] * n
order_opt = np.full(n, np.nan, dtype=float)
answered = np.zeros(n, dtype=float)
rdp_cum = np.zeros(len(orders))
rdp_sqrd_cum = np.zeros(len(orders))
rdp_select_cum = np.zeros(len(orders))
answered_sum = 0
for i in range(n):
v = votes[i,]
if mechanism == 'lnmax':
logq_lnmax = pate.compute_logq_laplace(v, noise_scale)
rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders)
rdp_sqrd = rdp_query ** 2
pr_answered = 1
elif mechanism == 'gnmax':
logq_gmax = pate.compute_logq_gaussian(v, noise_scale)
rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders)
rdp_sqrd = rdp_query ** 2
pr_answered = 1
elif mechanism == 'gnmax_conf':
logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v)
logq_step2 = pate.compute_logq_gaussian(v, noise_scale)
q_step1 = np.exp(logq_step1)
logq_step1_min = min(logq_step1, math.log1p(-q_step1))
rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min,
2 ** .5 * params['sigma1'], orders)
rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders)
rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2
# The expression below evaluates
# E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
rdp_sqrd = (
rdp_gnmax_step1 ** 2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2
+ q_step1 * rdp_gnmax_step2 ** 2)
rdp_select_cum += rdp_gnmax_step1
pr_answered = q_step1
else:
raise ValueError(
'Mechanism must be one of ["lnmax", "gnmax", "gnmax_conf"]')
rdp_cum += rdp_query
rdp_sqrd_cum += rdp_sqrd
answered_sum += pr_answered
answered[i] = answered_sum
eps_total[i], order_opt[i] = pate.compute_eps_from_delta(
orders, rdp_cum, delta)
partition[i] = compute_partition(order_opt[i], eps_total[i])
if i > 0 and (i + 1) % 1000 == 0:
rdp_var = rdp_sqrd_cum / i - (
rdp_cum / i) ** 2 # Ignore Bessel's correction.
order_opt_idx = np.searchsorted(orders, order_opt[i])
eps_std = ((i + 1) * rdp_var[order_opt_idx]) ** .5 # Std of the sum.
print(
'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
'at order = {:.2f} (contribution from delta = {:.3f})'.format(
i + 1, answered_sum, eps_total[i], eps_std, order_opt[i],
-math.log(delta) / (order_opt[i] - 1)))
sys.stdout.flush()
return eps_total, partition, answered, order_opt