aiops/ContraAD/data_factory/data_loader.py (479 lines of code) (raw):

import torch import os import random from torch.utils.data import Dataset from torch.utils.data import DataLoader from PIL import Image import numpy as np import collections import numbers import math import pandas as pd from sklearn.preprocessing import StandardScaler import pickle def seed_worker(worker_id): worker_seed = torch.initial_seed() % 2**32 np.random.seed(worker_seed) random.seed(worker_seed) g = torch.Generator() g.manual_seed(0) class PSMSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = pd.read_csv(data_path + '/train.csv') data = data.values[:, 1:] data = np.nan_to_num(data) self.scaler.fit(data) data = self.scaler.transform(data) test_data = pd.read_csv(data_path + '/test.csv') test_data = test_data.values[:, 1:] test_data = np.nan_to_num(test_data) self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = pd.read_csv(data_path + '/test_label.csv').values[:, 1:] def __len__(self): """ Number of images in the object dataset. """ if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class MSLSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/MSL_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/MSL_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/MSL_test_label.npy") def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SMAPSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/SMAP_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/SMAP_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/SMAP_test_label.npy") def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": #train and val did not use label return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SMDSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/SMD_train.npy")[:,:] self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/SMD_test.npy")[:,:] self.test = self.scaler.transform(test_data) self.train = data data_len = len(self.train) self.val = self.train[(int)(data_len * 0.8):] self.test_labels = np.load(data_path + "/SMD_test_label.npy")[:] def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class UCRSegLoader(object): def __init__(self, index, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.index = index self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/UCR_"+str(index)+"_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/UCR_"+str(index)+"_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/UCR_"+str(index)+"_test_label.npy") if self.mode == "val": print("train:", self.train.shape) print("test:", self.test.shape) def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class UCRAUGSegLoader(object): def __init__(self, index, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.index = index self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/UCR_AUG_"+str(index)+"_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/UCR_AUG_"+str(index)+"_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/UCR_AUG_"+str(index)+"_test_label.npy") if self.mode == "val": print("train:", self.train.shape) print("test:", self.test.shape) def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class NIPS_TS_WaterSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/NIPS_TS_Water_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/NIPS_TS_Water_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/NIPS_TS_Water_test_label.npy") print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class NIPS_TS_SwanSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/NIPS_TS_Swan_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/NIPS_TS_Swan_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/NIPS_TS_Swan_test_label.npy") print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class NIPS_TS_CCardSegLoader(object): def __init__(self, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/NIPS_TS_CCard_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/NIPS_TS_CCard_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/NIPS_TS_CCard_test_label.npy") def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SMD_OriSegLoader(object): def __init__(self, index, data_path, win_size, step, mode="train"): self.mode = mode self.step = step self.index = index self.win_size = win_size self.scaler = StandardScaler() data = np.load(data_path + "/SMD_Ori_"+str(index)+"_train.npy") self.scaler.fit(data) data = self.scaler.transform(data) test_data = np.load(data_path + "/SMD_Ori_"+str(index)+"_test.npy") self.test = self.scaler.transform(test_data) self.train = data self.val = self.test self.test_labels = np.load(data_path + "/SMD_Ori_"+str(index)+"_test_label.npy") if self.mode == "val": print("train:", self.train.shape) print("test:", self.test.shape) def __len__(self): if self.mode == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.mode == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.mode == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.mode == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SWATSegLoader(Dataset): def __init__(self, root_path, win_size, step=1, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv')) test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv')) labels = test_data.values[:, -1:] train_data = train_data.values[:, :-1] test_data = test_data.values[:, :-1] self.scaler.fit(train_data) train_data = self.scaler.transform(train_data) test_data = self.scaler.transform(test_data) self.train = train_data self.test = test_data data_len = len(self.train) self.val = self.train[(int)(data_len * 0.8):] self.test_labels = labels print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): """ Number of images in the object dataset. """ if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) def get_loader_segment(index, data_path, batch_size, win_size=100, step=100, mode='train', dataset='KDD'): step = 1 if (dataset == 'SMD'): dataset = SMDSegLoader(data_path, win_size, step, mode) elif (dataset == 'MSL'): dataset = MSLSegLoader(data_path, win_size, step, mode) elif (dataset == 'SMAP'): dataset = SMAPSegLoader(data_path, win_size, step, mode) elif (dataset == 'PSM'): dataset = PSMSegLoader(data_path, win_size, step, mode) elif (dataset =='SWAT'): dataset = SWATSegLoader(data_path,win_size,step,mode) elif (dataset == 'UCR'): dataset = UCRSegLoader(index, data_path, win_size, step, mode) elif (dataset == 'UCR_AUG'): dataset = UCRAUGSegLoader(index, data_path, win_size, step, mode) elif (dataset == 'NIPS_TS_Water'): dataset = NIPS_TS_WaterSegLoader(data_path, win_size, step, mode) elif (dataset == 'NIPS_TS_Swan'): dataset = NIPS_TS_SwanSegLoader(data_path, win_size, step, mode) elif (dataset == 'NIPS_TS_CCard'): dataset = NIPS_TS_CCardSegLoader(data_path, win_size, step, mode) elif (dataset == 'SMD_Ori'): dataset = SMD_OriSegLoader(index, data_path, win_size, step, mode) shuffle = False if mode == 'train': shuffle = True data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=8, drop_last=False, worker_init_fn=seed_worker, generator=g, ) return data_loader