def get_dataset()

in attacks/privacy_attacks.py [0:0]


def get_dataset(params):
    """
    load data for privacy attacks
    """
    if params.dataset=='cifar10':
        if params.aug==True:
            print('Using data augmentation')
            augmentations = [transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip()]
            normalize = [transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
            model_transform = transforms.Compose(augmentations + normalize)
        else:
            print('Not using data augmentation')
            normalize = [transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
            model_transform = transforms.Compose(normalize)
        return torchvision.datasets.CIFAR10(root=params.data_root, train=True, download=True, transform=model_transform)
    
    if params.dataset=='mnist':
        transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
        return torchvision.datasets.MNIST(root=params.data_root, train=True, download=True, transform=transform)

    elif params.dataset=='imagenet':
        if params.aug==True:
            print('Using data augmentation to train model')
            augmentations = [transforms.Resize(256),transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip()]
            normalize = [transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
            transform = transforms.Compose(augmentations + normalize)
        else:
            print('Not using data augmentation to train model')
            transform = transforms.Compose( [transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
   
        dataset = torchvision.datasets.ImageFolder(root=params.data_root+'/train',transform=transform)
        
        return dataset
    elif params.dataset=='cifar100':
        if params.aug:
            augmentations = [transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip()]
            normalize = [transforms.ToTensor(),transforms.Normalize(mean=[n/255 for n in [129.3, 124.1, 112.4]], std=[n/255 for n in [68.2,  65.4,  70.4]])]
            transform = transforms.Compose(augmentations + normalize)

        else:
            transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=[n/255 for n in [129.3, 124.1, 112.4]], std=[n/255 for n in [68.2,  65.4,  70.4]])])
        
        dataset = torchvision.datasets.CIFAR100(root=params.data_root, train=True, download=True, transform=transform)
        return dataset
    
    elif params.dataset=='credit':
        cred=fetch_openml('credit-g')
    
        data = SimpleImputer(missing_values=np.nan, strategy='mean', copy=True).fit(cred.data).transform(cred.data)
        target = preprocessing.LabelEncoder().fit(cred.target).transform(cred.target)   
        X=data
        norm = np.max(np.concatenate((-1*X.min(axis=0)[np.newaxis], X.max(axis=0)[np.newaxis]),axis=0).T, axis=1).astype('float32')
        data=np.divide(data,norm)

        data=torch.tensor(data).float()
        target=torch.tensor(target).long()
        
        ids=np.arange(1000)[:800]
        
        
        final_data = []
        for i in ids:
            final_data.append([data[i], target[i]])
        
        # norm=np.max
        params.num_classes = 2
        
        # dataloader = torch.utils.data.DataLoader(final_data, shuffle=True, batch_size=params.batch_size)
        # n_data=len(final_data)
        return final_data
    elif params.dataset=='hep':
        
        hep=fetch_openml('hepatitis')
    
        data = SimpleImputer(missing_values=np.nan, strategy='mean', copy=True).fit(hep.data).transform(hep.data)
        target = preprocessing.LabelEncoder().fit(hep.target).transform(hep.target)   
        
        X=data
        norm = np.max(np.concatenate((-1*X.min(axis=0)[np.newaxis], X.max(axis=0)[np.newaxis]),axis=0).T, axis=1).astype('float32')
        data=np.divide(data,norm)

        data=torch.tensor(data).float()
        target=torch.tensor(target).long()
    
        ids=np.arange(155)[:124]
        
        
        final_data = []
        for i in ids:
            final_data.append([data[i], target[i]])
        
        params.num_classes = 2

        return final_data
    elif params.dataset == 'adult':
        columns = ["age", "workClass", "fnlwgt", "education", "education-num","marital-status", "occupation", "relationship","race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"]
        train_data = pd.read_csv(params.data_root+'/adult.data', names=columns, sep=' *, *', na_values='?')
        test_data  = pd.read_csv(params.data_root+'/adult.test', names=columns, sep=' *, *', skiprows=1, na_values='?')

        original_train=train_data
        original_test=test_data
        num_train = len(original_train)
        original = pd.concat([original_train, original_test])
        labels = original['income']
        labels = labels.replace('<=50K', 0).replace('>50K', 1)
        labels = labels.replace('<=50K.', 0).replace('>50K.', 1)

        # Remove target 
        del original["income"]

        data = adult_data_transform(original)
        train_data = data[:num_train]
        train_labels = labels[:num_train]
        test_data = data[num_train:]
        test_labels = labels[num_train:]

        test_data=torch.tensor(test_data.to_numpy()).float()
        train_data=torch.tensor(train_data.to_numpy()).float()
        test_labels=torch.tensor(test_labels.to_numpy(dtype='int64')).long()
        train_labels=torch.tensor(train_labels.to_numpy(dtype='int64')).long()
        
        final_data = []
        for i in np.arange(len(train_data)):
            final_data.append([train_data[i], train_labels[i]])
        
        return final_data