in data_preparation/make_vad_inputs.py [0:0]
def findAllSeqs(dirName,
extension='.flac',
loadCache=False):
r"""
Lists all the sequences with the given extension in the dirName directory.
Output:
outSequences, speakers
outSequence
A list of tuples seq_path, speaker where:
- seq_path is the relative path of each sequence relative to the
parent directory
- speaker is the corresponding speaker index
outSpeakers
The speaker labels (in order)
The speaker labels are organized the following way
\dirName
\speaker_label
\..
...
seqName.extension
"""
cache_path = os.path.join(dirName, '_seqs_cache.txt')
if loadCache:
try:
outSequences, speakers = torch.load(cache_path)
print(f'Loaded from cache {cache_path} successfully')
return outSequences, speakers
except OSError as err:
print(f'Ran in an error while loading {cache_path}: {err}')
print('Could not load cache, rebuilding')
if dirName[-1] != os.sep:
dirName += os.sep
prefixSize = len(dirName)
speakersTarget = {}
outSequences = []
for root, dirs, filenames in tqdm.tqdm(os.walk(dirName)):
filtered_files = [f for f in filenames if f.endswith(extension)]
if len(filtered_files) > 0:
speakerStr = root[prefixSize:].split(os.sep)[0]
if speakerStr not in speakersTarget:
speakersTarget[speakerStr] = len(speakersTarget)
speaker = speakersTarget[speakerStr]
for filename in filtered_files:
full_path = os.path.join(root[prefixSize:], filename)
outSequences.append((speaker, full_path))
outSpeakers = [None for x in speakersTarget]
for key, index in speakersTarget.items():
outSpeakers[index] = key
try:
torch.save((outSequences, outSpeakers), cache_path)
print(f'Saved cache file at {cache_path}')
except OSError as err:
print(f'Ran in an error while saving {cache_path}: {err}')
return outSequences, outSpeakers