def fredrikson14_inverter()

in model_inversion.py [0:0]


def fredrikson14_inverter(
        train_data, target_attribute, model, weights=None, one_hot=False, **kwargs):
    """
    Implements the model inversion attack of:
        Fredrikson, 2014, Privacy in Pharmacogenetics: An End-to-End Case Study
        of Personalized Warfarin Dosing
    """
    if weights is None:
        weights = torch.ones(train_data["features"].size(0))

    log_marginal = compute_log_marginal(
        train_data, target_attribute, weights=weights, one_hot=one_hot)

    if type(model) == models.LeastSquares:
        n, d = train_data["features"].shape
        std_var = (weights * model.loss(train_data)).sum().true_divide(n - d)
        score_fn = lambda data : -0.5 * (weights * model.loss(data)) / std_var
    elif type(model) == models.Logistic:
        preds = model.predict(train_data["features"])
        y = train_data["targets"]
        matched = preds == y
        confusions = torch.tensor([
            [matched[y == 0].sum(), (~matched)[y == 0].sum()],
            [(~matched)[y == 1].sum(), matched[y == 1].sum()]
        ])
        pi = confusions.true_divide(confusions.sum(axis=0, keepdim=True))
        def score_fn(data):
            preds = model.predict(data["features"])
            y = data["targets"]
            return pi[y, preds.long()].log()
    else:
        raise ValueError("Unknown model type.")

    # For each possible value of the target attribute compute score for the
    # attribute which should be proportional to log pi(y, y') + log p(x), where
    # pi(y, y') is a model dependent performance measure.
    tgt_features = train_data["features"].clone().detach()
    tgt_features[:, range(*target_attribute)] = 0.

    scores = []
    for c in range(*target_attribute):
        tgt_features[:, c] = 1.
        score = score_fn(
            {"features": tgt_features, "targets": train_data["targets"]})
        score += log_marginal[c - target_attribute[0]]
        scores.append(score)
        tgt_features[:, c] = 0.
    # Try all 0s
    if not one_hot:
        score = score_fn(
            {"features": tgt_features, "targets": train_data["targets"]})
        score += log_marginal[-1]
        scores.append(score)

    # Make the prediction:
    return torch.argmax(torch.stack(scores, axis=1), axis=1)