in modules.py [0:0]
def backward(ctx, grad_output):
# unpack saved context:
input, weight, bias = ctx.saved_tensors
# compute gradient with respect to input (not private):
grad_input = grad_output.mm(weight)
# compute norms of per-example gradients with respect to parameters:
gradient_norm = grad_output.pow(2.).sum(1, keepdim=True).mul(
input.pow(2.).sum(1, keepdim=True)
)
gradient_norm = torch.sqrt(gradient_norm)
# aggregate the clipped per-example gradients:
multiplier = _get_multipliers(gradient_norm, ctx.clip)
grad_weight = grad_output.div(multiplier + 1e-9).t().matmul(input)
# add noise to gradient:
grad_weight += torch.randn_like(grad_weight) * ctx.clip * ctx.std
# perform same procedure for bias gradients:
if bias is not None:
multiplier = _get_multipliers(grad_output.norm(2, 1), ctx.clip)
grad_bias = grad_output.mul(multiplier.unsqueeze(1)).sum(0)
grad_bias += torch.randn_like(grad_bias) * ctx.clip * ctx.std
else:
grad_bias = None
# return private gradients:
return grad_input, grad_weight, grad_bias, None, None