aiops/AnomalyDetection/utils/utils.py (294 lines of code) (raw):

import pandas as pd import numpy as np import matplotlib.pyplot as plt import math import random def readMutshangData(datapath,labelpath): datas=np.load(datapath)[:14000] labels=np.load(labelpath)[:14000,0] print("label shape",labels.shape) datas=datas.reshape(35,400,-1) labels=labels.reshape(35,400) np.save("mutshangSplitData.npy",datas) np.save("mutshangSplitLabel.npy",labels) return datas,labels def readPublicData(path,path2=""): datas = np.genfromtxt(path, delimiter=',') datas/=datas.sum(axis=-1,keepdims=True) datalen,fealen=datas.shape datas=datas.reshape(2,int(datalen/2),fealen) print(datas.shape) return datas def listds(start,end): dss=[] pres=start count=start%100 while pres<=end: dss.append(pres) if count<24: count+=1 pres+=1 else: count=0 pres=pres//100 day=pres%100 if day+1<=31:#cross day pres+=1 pres*=100 else: pres=pres//100 month=pres%100 if month+1<=12:#cross month pres+=1 pres*=100 pres+=1 pres*=100 else:#cross year pres=pres//100 pres+=1#year+1 pres*=100 pres+=1#month=1 pres*=100 pres+=1#day=1 pres*=100#time=0 return dss def completeData(datas,rangeSeq,dss): ndatas=np.zeros((len(dss),len(rangeSeq))) for i,time in enumerate(dss): for j,ranges in enumerate(rangeSeq): result = datas[:, 5] == time result = datas[np.where(result)] result2 = result[:, 3] == ranges result2 = result[np.where(result2)] if len(result2)==0: ndatas[i][j]=0. else: ndatas[i][j]=result2[0,2] return ndatas def readData(path1,path2=""): datas = pd.read_csv(path1) datas = datas.sort_values('ds') datas = datas.groupby('cluster') rangeSeq = ['[0-10)', '[10-20)', '[20-30)', '[30-40)' , '[40-70)', '[70,110)', '[110,150)', '[150,190)' , '[190,230)', '[230-280)', '[280-330)', '[330-380)', '[380-430)','[430-*)'] dss = listds(2023121600, 2023122710) groups = [] for data in datas: completed = completeData(data[1].to_numpy(), rangeSeq, dss) groups.append(completed) print(groups[-1].shape) groups = np.stack(groups) if path2!="": labels=np.load(path2) else: labels=np.zeros((groups.shape[0],groups.shape[1])) return groups,labels def readData2(path1,path2=""): datas = pd.read_csv(path1) datas = datas.sort_values('ds') datas = datas.groupby('cluster') rangeSeq = ['[0-10)', '[10-20)', '[20-30)', '[30-40)' , '[40-70)', '[70,110)', '[110,150)', '[150,190)' , '[190,230)', '[230-280)', '[280-330)', '[330-380)', '[380-430)','[430-*)'] dss = listds(2023121600, 2024010210) groups = [] for data in datas: completed = completeData(data[1].to_numpy(), rangeSeq, dss) groups.append(completed) print(groups[-1].shape) groups = np.stack(groups) if path2!="": labels=np.load(path2) else: labels=np.zeros((groups.shape[0],groups.shape[1])) return groups,labels def insertNormality(datas,quickRatio,quickNumRatio,quickTimeRange,labels,exeTime,edges): exeTime=exeTime[1:] edges=edges[:-1] #exeTime = np.array([5, 15, 25, 35, 55, 90, 130, 170, 210, 255, 305, 355, 405]) #edges = [0,10, 20, 30, 40, 70, 110, 150, 190, 230, 280, 330, 380] for i, data in enumerate(datas): length, interval = data.shape changeNum = math.ceil(length * quickRatio) positions = [random.randint(0, length - 1) for i in range(changeNum)] labels[i, positions] = -1. for position in positions: quickNum = np.random.random(interval - 1) quickNum[0] = 0 quickNum /= quickNum.sum() quickNum *= quickNumRatio quickTime = np.random.randint(quickTimeRange[0], quickTimeRange[1], size=interval - 1) presTime = np.maximum(exeTime - quickTime,0) slowDur = random.randint(2, 5) for k in range(min(slowDur, length - position)): labels[i, position + k] = -1. for j in range(interval - 1): transfer = min(datas[i, position + k, j], quickNum[j]) datas[i, position + k, j] = max(datas[i, position + k, j] - quickNum[j], 0) target = len(edges)-1 for edge in reversed(edges): if presTime[j] >= edge: break target -= 1 datas[i, position + k, target] += transfer return datas,labels def insertAnomaly(datas,anomalyRatio,slowNumRatio,slowTimeRange,labels,exeTime,edges): #labels=np.zeros((datas.shape[0],datas.shape[1])) exeTime=exeTime[:-1] edges=edges[1:] #exeTime=np.array([5,15,25,35,55,90,130,170,210,255,305,355,405]) #edges=[10,20,30,40,70,110,150,190,230,280,330,380,430] for i,data in enumerate(datas): length,interval=data.shape anomalyNum=math.ceil(length*anomalyRatio) positions=[random.randint(0,length-1) for i in range(anomalyNum)] for position in positions: slowNum=np.random.random(interval-1) slowNum[-1]=0 slowNum/=slowNum.sum() slowNum*=slowNumRatio slowTime=np.random.randint(slowTimeRange[0],slowTimeRange[1],size=interval-1) presTime=exeTime+slowTime slowDur = random.randint(2, 5) for k in range(min(slowDur,length-position)): flag = True for j in range(interval - 1): transfer=min(datas[i,position+k,j],slowNum[j]) datas[i,position + k, j] = max(datas[i,position + k, j] - slowNum[j], 0) target = 0 if transfer>0: flag=False for edge in edges: if presTime[j] < edge: break target += 1 datas[i,position + k, target] += transfer if not flag: labels[i,position+k] = 1. return datas,labels def insertAnomaly2(datas,anomalyRatio,slowNumRatio,slowTimeRange,exeTime,edges): labels=np.zeros(datas.shape[0]) exeTime = exeTime[:-1] edges=edges[1:] #exeTime=np.array([5,15,25,35,55,90,130,170,210,255,305,355,405]) #edges=[10,20,30,40,70,110,150,190,230,280,330,380,430] length,interval=datas.shape anomalyNum=math.ceil(length*anomalyRatio) positions=[random.randint(0,length-1) for i in range(anomalyNum)] labels[positions]=1. for position in positions: slowNum=np.random.random(interval-1) slowNum[-1]=0 slowNum/=slowNum.sum() slowNum*=slowNumRatio slowTime=np.random.randint(slowTimeRange[0],slowTimeRange[1],size=interval-1) presTime=exeTime+slowTime slowDur=random.randint(2,8) for k in range(min(slowDur,length-position)): labels[position+k]=1. for j in range(interval-1): datas[position+k,j]=max(datas[position+k,j]-slowNum[j],0) target=0 for edge in edges: if presTime[j]<edge: break target+=1 datas[position+k,target]+=slowNum[j] return datas,labels def normaliseData(datas,omit=True): if omit: ndata=datas[:,1:] else: ndata=datas[:,:] ndata=np.array(ndata,dtype=np.float64) ndataSum=ndata.sum(axis=-1)+0.0001 ndata=ndata.transpose(1,0) ndata/=ndataSum ndata=ndata.transpose(1,0) return ndata def generateSin(start,end,num,amplitude,frequency,phase,biase): time = np.linspace(start, end, num) sin_wave = amplitude * np.sin(2 * np.pi * frequency * time + phase)+biase return sin_wave def generateSin2(start,end,num,amplitude,frequencys,phase,biase): num*=10 sin_waves=[] step = 100 time = np.linspace(start, end, num) for i in range(0,num,step): frequency=np.random.uniform(frequencys[1],frequencys[2]) ntime=np.linspace(time[i],time[i+step-1],math.ceil(step*frequencys[0]/frequency)) sin_wave = amplitude * np.sin(2 * np.pi * frequencys[0] * ntime + phase)+biase sin_waves.append(sin_wave) sin_waves=np.concatenate(sin_waves,axis=0) return sin_waves def syntheticData2(clusterNum,length,fealen,noise=False): datas = [] componsitionNum=2 for i in range(clusterNum): waves = [] for j in range(fealen): frequency = random.randint(6, 10) # 1/24+random.gauss(0,0.005)#random.randint(6,10) amplitude = random.random() * 10 bias = random.randint(1, 10) phase = random.gauss(0, 1) * 2 * math.pi wave = generateSin(0, length, length, amplitude, random.uniform(1/100,1/200), phase, bias) + amplitude for k in range(componsitionNum): wave+=generateSin(0,length,length,amplitude*0.2,1/24,phase*random.random(),bias) # frequency=1/40+random.gauss(0,0.005)#random.randint(6,10) # wave2=generateSin(0,length,length,amplitude,frequency,phase,bias)+amplitude # wave=wave1+wave2 if noise: wave += np.random.normal(0, amplitude * 0.03, length) waves.append(wave) datas.append(waves) datas = np.array(datas) # batch,fealen,winlen dataSum = datas.sum(axis=-2) # batch,winlen datas = datas.transpose((1, 0, 2)) # fealen,batch,winlen datas /= dataSum datas = datas.transpose((1, 2, 0)) labels = np.zeros((datas.shape[0], datas.shape[1])) return datas, labels def syntheticData(clusterNum,length,fealen,noise=False,std=0.06): datas=[] for i in range(clusterNum): waves=[] for j in range(fealen): frequency=random.randint(6,10)#1/24+random.gauss(0,0.005)#random.randint(6,10) amplitude=random.random()*10 bias=random.randint(1,10) phase=random.gauss(0,1)*2*math.pi wave=generateSin(0,length,length,amplitude,frequency,phase,bias)+amplitude #frequency=1/40+random.gauss(0,0.005)#random.randint(6,10) #wave2=generateSin(0,length,length,amplitude,frequency,phase,bias)+amplitude #wave=wave1+wave2 if noise: wave+=np.random.normal(0,amplitude*std,length) waves.append(wave) datas.append(waves) datas=np.array(datas)#batch,fealen,winlen dataSum = datas.sum(axis=-2)#batch,winlen datas = datas.transpose((1, 0, 2)) # fealen,batch,winlen datas/=dataSum datas=datas.transpose((1,2,0)) labels=np.zeros((datas.shape[0],datas.shape[1])) return datas,labels def syntheticData3(clusterNum,length,fealen,noise=False,frequencyRatio=0.3): datas=[] for i in range(clusterNum): waves=[] for j in range(fealen): frequency=random.uniform(0.04,0.1)#1/24+random.gauss(0,0.005)#random.randint(6,10) frequencys=[frequency,frequency,frequency*(1+frequencyRatio)] amplitude=random.random()*10 bias=random.randint(1,10) phase=random.gauss(0,1)*2*math.pi wave=generateSin2(0,length,length,amplitude,frequencys,phase,bias)+amplitude wave=wave[:length] frequency = random.uniform(0.08, 0.1) # 1/24+random.gauss(0,0.005)#random.randint(6,10) frequencys = [frequency, frequency, frequency * (1 + frequencyRatio)] amplitude = random.random() * 10 wave+=(generateSin2(0,length,length,amplitude,frequencys,phase,bias)+amplitude)[:length] frequency = random.uniform(0.01, 0.04) # 1/24+random.gauss(0,0.005)#random.randint(6,10) frequencys = [frequency, frequency, frequency * (1 + frequencyRatio)] amplitude = random.random() * 10 wave+=(generateSin2(0,length,length,amplitude,frequencys,phase,bias)+amplitude)[:length] #wave=generateSin(0,length,length,amplitude,frequency,phase,bias)+amplitude #frequency=1/40+random.gauss(0,0.005)#random.randint(6,10) #wave2=generateSin(0,length,length,amplitude,frequency,phase,bias)+amplitude #wave=wave1+wave2 if noise: wave+=np.random.normal(0,amplitude*0.06,length) waves.append(wave) datas.append(waves) datas=np.array(datas)#batch,fealen,winlen dataSum = datas.sum(axis=-2)#batch,winlen datas = datas.transpose((1, 0, 2)) # fealen,batch,winlen datas/=dataSum datas=datas.transpose((1,2,0)) labels=np.zeros((datas.shape[0],datas.shape[1])) return datas,labels