in data_preparation/metadata_completion/utilities.py [0:0]
def get_speaker_data(path_dir, list_metadata, pathWav):
speakerTalk = {}
nData = len(list_metadata)
multiples = 0
bar = progressbar.ProgressBar(maxval=nData)
bar.start()
for nM, metadataName in enumerate(list_metadata):
bar.update(nM)
zipName = get_zip_name(metadataName)
wavName = zipName.replace("64kb_mp3.zip", "wav")
speakerData = getJSON(os.path.join(path_dir,
get_speaker_data_name(metadataName)))
dirWav = os.path.join(pathWav, wavName)
if not os.path.isdir(dirWav):
continue
listWav = [f'{f}.wav' for f in speakerData["names"]]
for index, wavFile in enumerate(listWav):
locPath = os.path.join(dirWav, wavFile)
if not os.path.isfile(locPath):
continue
info = torchaudio.info(locPath)
size = (info[0].length / info[0].rate) / 3600
speakers = speakerData['readers'][index]
if speakers is None:
speakers = ['null']
if len(speakers) > 1:
multiples += size
for IDspeaker in speakers:
if IDspeaker not in speakerTalk:
speakerTalk[IDspeaker] = 0
speakerTalk[IDspeaker] += size
bar.finish()
return speakerTalk, multiples