in data_preparation/split_librilight/puts_json.py [0:0]
def process_dir(normalized_book_name, dir_name, name2meta, file_times, voice_activities, snr_table, test_speakers, extension='*.flac'):
speaker2file = dict(zip(name2meta[normalized_book_name]['speaker_data']
['names'], name2meta[normalized_book_name]['speaker_data']['readers']))
assert normalized_book_name in name2meta
assert normalized_book_name in snr_table, normalized_book_name
assert normalized_book_name in voice_activities and normalized_book_name in file_times
errors = BookError()
for file_name in dir_name.glob(extension):
fname = file_name.stem
assert fname.endswith('_64kb')
fname = fname[:-5] # cut _64kb
if fname not in snr_table[normalized_book_name]:
errors.no_match_snr.add(fname)
continue
assert fname in voice_activities[normalized_book_name]
assert fname in file_times[normalized_book_name]
if fname in speaker2file:
speakers = speaker2file[fname]
else:
match = [z for z in speaker2file.keys() if z.startswith(fname)]
if len(match) != 1:
errors.no_match_speaker.add(fname)
continue
else:
errors.fuzzy_matched_speaker.add(fname)
speakers = speaker2file[match[0]]
if speakers is None:
errors.no_match_speaker.add(fname)
continue
if len(speakers) != 1:
errors.no_match_speaker.add(fname)
continue
speaker = speakers[0]
if int(speaker) in test_speakers:
errors.test_speakers.add(fname)
continue
errors.ok += 1
target = file_name.parent / (file_name.stem + '.json')
data = copy.deepcopy(name2meta[normalized_book_name])
del data['speaker_data']
data['speaker'] = speaker
data['file_length_sec'] = file_times[normalized_book_name][fname]
del data['meta']['totaltime']
del data['meta']['trancription_status']
meta = data['meta']
del data['meta']
data['book_meta'] = meta
assert fname in snr_table[normalized_book_name], (
fname, normalized_book_name)
data['snr'] = round(snr_table[normalized_book_name][fname], 4)
data['voice_activity'] = [(round(x[0], 4), round(x[1], 4))
for x in voice_activities[normalized_book_name][fname]]
with open(target, 'w') as fout:
out = json.dumps(data, indent=1)
fout.write(out)
return errors