in eval/PER_src/simplePhonemLearner.py [0:0]
def loadSeqs(self):
# Labels
self.seqOffset = [0]
self.phoneLabels = []
self.phoneOffsets = [0]
self.data = []
self.maxSize = 0
self.maxSizePhone = 0
# Data
nprocess = min(30, len(self.seqNames))
start_time = time.time()
to_load = [Path(self.pathDB) / x for _, x in self.seqNames]
with Pool(nprocess) as p:
poolData = p.map(load, to_load)
tmpData = []
poolData.sort()
totSize = 0
minSizePhone = 1000000
for seqName, seq in poolData:
self.phoneLabels += self.phoneLabelsDict[seqName]
self.phoneOffsets.append(len(self.phoneLabels))
self.maxSizePhone = max(self.maxSizePhone,
len(self.phoneLabelsDict[seqName]))
minSizePhone = min(minSizePhone, len(
self.phoneLabelsDict[seqName]))
sizeSeq = seq.size(1)
self.maxSize = max(self.maxSize, sizeSeq)
totSize += sizeSeq
tmpData.append(seq)
self.seqOffset.append(self.seqOffset[-1] + sizeSeq)
del seq
self.data = torch.cat(tmpData, dim=1)
self.phoneLabels = torch.tensor(self.phoneLabels, dtype=torch.long)
print(f'Loaded {len(self.phoneOffsets)} sequences '
f'in {time.time() - start_time:.2f} seconds')
print(f'maxSizeSeq : {self.maxSize}')
print(f'maxSizePhone : {self.maxSizePhone}')
print(f"minSizePhone : {minSizePhone}")
print(f'Total size dataset {totSize / (16000 * 3600)} hours')