in attacks/privacy_attacks.py [0:0]
def get_dataset(params):
"""
load data for privacy attacks
"""
if params.dataset=='cifar10':
if params.aug==True:
print('Using data augmentation')
augmentations = [transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip()]
normalize = [transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
model_transform = transforms.Compose(augmentations + normalize)
else:
print('Not using data augmentation')
normalize = [transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
model_transform = transforms.Compose(normalize)
return torchvision.datasets.CIFAR10(root=params.data_root, train=True, download=True, transform=model_transform)
if params.dataset=='mnist':
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
return torchvision.datasets.MNIST(root=params.data_root, train=True, download=True, transform=transform)
elif params.dataset=='imagenet':
if params.aug==True:
print('Using data augmentation to train model')
augmentations = [transforms.Resize(256),transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip()]
normalize = [transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
transform = transforms.Compose(augmentations + normalize)
else:
print('Not using data augmentation to train model')
transform = transforms.Compose( [transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
dataset = torchvision.datasets.ImageFolder(root=params.data_root+'/train',transform=transform)
return dataset
elif params.dataset=='cifar100':
if params.aug:
augmentations = [transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip()]
normalize = [transforms.ToTensor(),transforms.Normalize(mean=[n/255 for n in [129.3, 124.1, 112.4]], std=[n/255 for n in [68.2, 65.4, 70.4]])]
transform = transforms.Compose(augmentations + normalize)
else:
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=[n/255 for n in [129.3, 124.1, 112.4]], std=[n/255 for n in [68.2, 65.4, 70.4]])])
dataset = torchvision.datasets.CIFAR100(root=params.data_root, train=True, download=True, transform=transform)
return dataset
elif params.dataset=='credit':
cred=fetch_openml('credit-g')
data = SimpleImputer(missing_values=np.nan, strategy='mean', copy=True).fit(cred.data).transform(cred.data)
target = preprocessing.LabelEncoder().fit(cred.target).transform(cred.target)
X=data
norm = np.max(np.concatenate((-1*X.min(axis=0)[np.newaxis], X.max(axis=0)[np.newaxis]),axis=0).T, axis=1).astype('float32')
data=np.divide(data,norm)
data=torch.tensor(data).float()
target=torch.tensor(target).long()
ids=np.arange(1000)[:800]
final_data = []
for i in ids:
final_data.append([data[i], target[i]])
# norm=np.max
params.num_classes = 2
# dataloader = torch.utils.data.DataLoader(final_data, shuffle=True, batch_size=params.batch_size)
# n_data=len(final_data)
return final_data
elif params.dataset=='hep':
hep=fetch_openml('hepatitis')
data = SimpleImputer(missing_values=np.nan, strategy='mean', copy=True).fit(hep.data).transform(hep.data)
target = preprocessing.LabelEncoder().fit(hep.target).transform(hep.target)
X=data
norm = np.max(np.concatenate((-1*X.min(axis=0)[np.newaxis], X.max(axis=0)[np.newaxis]),axis=0).T, axis=1).astype('float32')
data=np.divide(data,norm)
data=torch.tensor(data).float()
target=torch.tensor(target).long()
ids=np.arange(155)[:124]
final_data = []
for i in ids:
final_data.append([data[i], target[i]])
params.num_classes = 2
return final_data
elif params.dataset == 'adult':
columns = ["age", "workClass", "fnlwgt", "education", "education-num","marital-status", "occupation", "relationship","race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"]
train_data = pd.read_csv(params.data_root+'/adult.data', names=columns, sep=' *, *', na_values='?')
test_data = pd.read_csv(params.data_root+'/adult.test', names=columns, sep=' *, *', skiprows=1, na_values='?')
original_train=train_data
original_test=test_data
num_train = len(original_train)
original = pd.concat([original_train, original_test])
labels = original['income']
labels = labels.replace('<=50K', 0).replace('>50K', 1)
labels = labels.replace('<=50K.', 0).replace('>50K.', 1)
# Remove target
del original["income"]
data = adult_data_transform(original)
train_data = data[:num_train]
train_labels = labels[:num_train]
test_data = data[num_train:]
test_labels = labels[num_train:]
test_data=torch.tensor(test_data.to_numpy()).float()
train_data=torch.tensor(train_data.to_numpy()).float()
test_labels=torch.tensor(test_labels.to_numpy(dtype='int64')).long()
train_labels=torch.tensor(train_labels.to_numpy(dtype='int64')).long()
final_data = []
for i in np.arange(len(train_data)):
final_data.append([train_data[i], train_labels[i]])
return final_data