def get_speaker_data()

in data_preparation/metadata_completion/utilities.py [0:0]


def get_speaker_data(path_dir, list_metadata, pathWav):
    speakerTalk = {}
    nData = len(list_metadata)
    multiples = 0

    bar = progressbar.ProgressBar(maxval=nData)
    bar.start()
    for nM, metadataName in enumerate(list_metadata):

        bar.update(nM)
        zipName = get_zip_name(metadataName)
        wavName = zipName.replace("64kb_mp3.zip", "wav")
        speakerData = getJSON(os.path.join(path_dir,
                                           get_speaker_data_name(metadataName)))

        dirWav = os.path.join(pathWav, wavName)
        if not os.path.isdir(dirWav):
            continue

        listWav = [f'{f}.wav' for f in speakerData["names"]]

        for index, wavFile in enumerate(listWav):

            locPath = os.path.join(dirWav, wavFile)
            if not os.path.isfile(locPath):
                continue

            info = torchaudio.info(locPath)
            size = (info[0].length / info[0].rate) / 3600

            speakers = speakerData['readers'][index]

            if speakers is None:
                speakers = ['null']

            if len(speakers) > 1:
                multiples += size

            for IDspeaker in speakers:
                if IDspeaker not in speakerTalk:
                    speakerTalk[IDspeaker] = 0

                speakerTalk[IDspeaker] += size

    bar.finish()
    return speakerTalk, multiples