def process_dir()

in data_preparation/split_librilight/puts_json.py [0:0]


def process_dir(normalized_book_name, dir_name, name2meta, file_times, voice_activities, snr_table, test_speakers, extension='*.flac'):
    speaker2file = dict(zip(name2meta[normalized_book_name]['speaker_data']
                            ['names'], name2meta[normalized_book_name]['speaker_data']['readers']))

    assert normalized_book_name in name2meta
    assert normalized_book_name in snr_table, normalized_book_name
    assert normalized_book_name in voice_activities and normalized_book_name in file_times

    errors = BookError()

    for file_name in dir_name.glob(extension):
        fname = file_name.stem
        assert fname.endswith('_64kb')
        fname = fname[:-5]  # cut _64kb

        if fname not in snr_table[normalized_book_name]:
            errors.no_match_snr.add(fname)
            continue

        assert fname in voice_activities[normalized_book_name]
        assert fname in file_times[normalized_book_name]

        if fname in speaker2file:
            speakers = speaker2file[fname]
        else:
            match = [z for z in speaker2file.keys() if z.startswith(fname)]
            if len(match) != 1:
                errors.no_match_speaker.add(fname)
                continue
            else:
                errors.fuzzy_matched_speaker.add(fname)
            speakers = speaker2file[match[0]]

        if speakers is None:
            errors.no_match_speaker.add(fname)
            continue

        if len(speakers) != 1:
            errors.no_match_speaker.add(fname)
            continue

        speaker = speakers[0]
        if int(speaker) in test_speakers:
            errors.test_speakers.add(fname)
            continue

        errors.ok += 1

        target = file_name.parent / (file_name.stem + '.json')
        data = copy.deepcopy(name2meta[normalized_book_name])
        del data['speaker_data']
        data['speaker'] = speaker
        data['file_length_sec'] = file_times[normalized_book_name][fname]
        del data['meta']['totaltime']
        del data['meta']['trancription_status']
        meta = data['meta']
        del data['meta']
        data['book_meta'] = meta

        assert fname in snr_table[normalized_book_name], (
            fname, normalized_book_name)
        data['snr'] = round(snr_table[normalized_book_name][fname], 4)
        data['voice_activity'] = [(round(x[0], 4), round(x[1], 4))
                                  for x in voice_activities[normalized_book_name][fname]]

        with open(target, 'w') as fout:
            out = json.dumps(data, indent=1)
            fout.write(out)

    return errors