in private_prediction.py [0:0]
def subsagg_method(data, args, visualizer=None, title=None):
"""
Given a dataset `data` and arguments `args`, run a full test of the private
prediction algorithm of Dwork & Feldman (2018). Returns a `dict` containing
the `predictions` for the training and test data.
"""
# unspecified inference budgets means we are trying many values:
if args.inference_budget == -1:
inference_budgets = INFERENCE_BUDGETS
else:
inference_budgets = [args.inference_budget]
# split training set into disjoint subsets:
data["split_train"] = split_dataset(data["train"], args.num_models)
# train all classifiers:
logging.info(f"Training {args.num_models} disjoint classifiers...")
models = [None] * args.num_models
for idx in range(args.num_models):
# initialize model:
logging.info(f" => training model {idx + 1} of {args.num_models}:")
num_classes = int(data["train"]["targets"].max()) + 1
num_features = data["split_train"][idx]["features"].size(1)
models[idx] = modeling.initialize_model(
num_features, num_classes, model=args.model, device=args.device
)
# train using L2-regularized loss:
regularized_criterion = modeling.add_l2_regularization(
nn.CrossEntropyLoss(), models[idx], args.weight_decay
)
augmentation = (args.model != "linear")
modeling.train_model(models[idx], data["split_train"][idx],
criterion=regularized_criterion,
optimizer=args.optimizer,
num_epochs=args.num_epochs,
learning_rate=args.learning_rate,
batch_size=args.batch_size,
augmentation=augmentation,
visualizer=visualizer,
title=title)
# clean up:
del data["split_train"]
# perform inference on both training and test set:
logging.info("Performing inference with private predictor...")
predictions = {}
for split in data.keys():
# compute predictions of each model:
batch_size = data[split]["targets"].size(0) if args.model == "linear" else 128
preds = [modeling.test_model(
model, data[split], augmentation=augmentation, batch_size=batch_size,
) for model in models]
preds = [pred.argmax(dim=1) for pred in preds]
preds = torch.stack(preds, dim=1)
# compute private predictions:
if split not in predictions:
predictions[split] = {}
for inference_budget in inference_budgets:
# privacy parameter must be corrected for inference budget:
epsilon = args.epsilon / float(inference_budget)
if args.delta > 0:
eps, _ = advanced_compose(
args.epsilon, args.delta, inference_budget, args.delta)
epsilon = max(eps, epsilon)
# compute and store private predictions:
predictions[split][inference_budget] = \
private_prediction(preds, epsilon=epsilon)
# return predictions:
return predictions