def __init__()

in dataloading.py [0:0]


    def __init__(self, root, train=True, download=False, drop=True):
        """
        drop: If True, remove the last feature for a one-hot encoded
            attribute. This helps alleviate perfect colinearity amongst
            the features.
        """
        if download:
            # download dataloading code and data from repo
            try:
                git.Repo.clone_from("git@github.com:samuel-yeom/ml-privacy-csf18.git", root)
            except git.GitCommandError:
                print("Directory exists and is non-empty, skipping download")
        sys.path.append(os.path.join(root, "code"))
        from main import load_iwpc
        X, y, featnames = load_iwpc(os.path.join(root, "data"))
        X = X.todense()
        if drop:
            attrs = [f.split("=")[0] for f in featnames]
            drop_keys = ["cyp2c9", "race", "vkorc1"]
            drop_idx = [attrs.index(k) for k in drop_keys]
            X = np.delete(X, drop_idx, axis=1)
            featnames = np.delete(featnames, drop_idx)
        print("Attributes: " + str(featnames))
        X = torch.from_numpy(X).float()
        y = torch.from_numpy(y).float()
        # fix a random 80:20 train-val split
        torch.manual_seed(0)
        perm = torch.randperm(X.size(0))
        n_train = int(0.8 * X.size(0))
        if train:
            self.data = X[perm[:n_train], :]
            self.targets = y[perm[:n_train]]
        else:
            self.data = X[perm[n_train:], :]
            self.targets = y[perm[n_train:]]
        torch.manual_seed(time.time())