aiops/ContraAD/data_factory/data_loader.py (479 lines of code) (raw):
import torch
import os
import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
import collections
import numbers
import math
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pickle
def seed_worker(worker_id):
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)
class PSMSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = pd.read_csv(data_path + '/train.csv')
data = data.values[:, 1:]
data = np.nan_to_num(data)
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = pd.read_csv(data_path + '/test.csv')
test_data = test_data.values[:, 1:]
test_data = np.nan_to_num(test_data)
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = pd.read_csv(data_path + '/test_label.csv').values[:, 1:]
def __len__(self):
"""
Number of images in the object dataset.
"""
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class MSLSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/MSL_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/MSL_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/MSL_test_label.npy")
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SMAPSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/SMAP_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/SMAP_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/SMAP_test_label.npy")
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train": #train and val did not use label
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SMDSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/SMD_train.npy")[:,:]
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/SMD_test.npy")[:,:]
self.test = self.scaler.transform(test_data)
self.train = data
data_len = len(self.train)
self.val = self.train[(int)(data_len * 0.8):]
self.test_labels = np.load(data_path + "/SMD_test_label.npy")[:]
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class UCRSegLoader(object):
def __init__(self, index, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.index = index
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/UCR_"+str(index)+"_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/UCR_"+str(index)+"_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/UCR_"+str(index)+"_test_label.npy")
if self.mode == "val":
print("train:", self.train.shape)
print("test:", self.test.shape)
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class UCRAUGSegLoader(object):
def __init__(self, index, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.index = index
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/UCR_AUG_"+str(index)+"_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/UCR_AUG_"+str(index)+"_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/UCR_AUG_"+str(index)+"_test_label.npy")
if self.mode == "val":
print("train:", self.train.shape)
print("test:", self.test.shape)
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class NIPS_TS_WaterSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/NIPS_TS_Water_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/NIPS_TS_Water_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/NIPS_TS_Water_test_label.npy")
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class NIPS_TS_SwanSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/NIPS_TS_Swan_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/NIPS_TS_Swan_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/NIPS_TS_Swan_test_label.npy")
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class NIPS_TS_CCardSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/NIPS_TS_CCard_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/NIPS_TS_CCard_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/NIPS_TS_CCard_test_label.npy")
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SMD_OriSegLoader(object):
def __init__(self, index, data_path, win_size, step, mode="train"):
self.mode = mode
self.step = step
self.index = index
self.win_size = win_size
self.scaler = StandardScaler()
data = np.load(data_path + "/SMD_Ori_"+str(index)+"_train.npy")
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = np.load(data_path + "/SMD_Ori_"+str(index)+"_test.npy")
self.test = self.scaler.transform(test_data)
self.train = data
self.val = self.test
self.test_labels = np.load(data_path + "/SMD_Ori_"+str(index)+"_test_label.npy")
if self.mode == "val":
print("train:", self.train.shape)
print("test:", self.test.shape)
def __len__(self):
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SWATSegLoader(Dataset):
def __init__(self, root_path, win_size, step=1, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv'))
test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv'))
labels = test_data.values[:, -1:]
train_data = train_data.values[:, :-1]
test_data = test_data.values[:, :-1]
self.scaler.fit(train_data)
train_data = self.scaler.transform(train_data)
test_data = self.scaler.transform(test_data)
self.train = train_data
self.test = test_data
data_len = len(self.train)
self.val = self.train[(int)(data_len * 0.8):]
self.test_labels = labels
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
"""
Number of images in the object dataset.
"""
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
def get_loader_segment(index, data_path, batch_size, win_size=100, step=100, mode='train', dataset='KDD'):
step = 1
if (dataset == 'SMD'):
dataset = SMDSegLoader(data_path, win_size, step, mode)
elif (dataset == 'MSL'):
dataset = MSLSegLoader(data_path, win_size, step, mode)
elif (dataset == 'SMAP'):
dataset = SMAPSegLoader(data_path, win_size, step, mode)
elif (dataset == 'PSM'):
dataset = PSMSegLoader(data_path, win_size, step, mode)
elif (dataset =='SWAT'):
dataset = SWATSegLoader(data_path,win_size,step,mode)
elif (dataset == 'UCR'):
dataset = UCRSegLoader(index, data_path, win_size, step, mode)
elif (dataset == 'UCR_AUG'):
dataset = UCRAUGSegLoader(index, data_path, win_size, step, mode)
elif (dataset == 'NIPS_TS_Water'):
dataset = NIPS_TS_WaterSegLoader(data_path, win_size, step, mode)
elif (dataset == 'NIPS_TS_Swan'):
dataset = NIPS_TS_SwanSegLoader(data_path, win_size, step, mode)
elif (dataset == 'NIPS_TS_CCard'):
dataset = NIPS_TS_CCardSegLoader(data_path, win_size, step, mode)
elif (dataset == 'SMD_Ori'):
dataset = SMD_OriSegLoader(index, data_path, win_size, step, mode)
shuffle = False
if mode == 'train':
shuffle = True
data_loader = DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=shuffle,
num_workers=8,
drop_last=False,
worker_init_fn=seed_worker,
generator=g,
)
return data_loader