in datasets/spd_datasets.py [0:0]
def get_callhome_files(path_to_callhome, langage="jpn"):
audio_files = glob.glob(path_to_callhome + "/callhome/{}/*.mp3".format(langage))
audio_files = {
"data": audio_files,
}
cha_files = {
"data": [],
}
for subset in audio_files:
for cha_path in audio_files[subset]:
file = cha_path.split("/")[-1].split(".")[0]
cha_files[subset].append(path_to_callhome + "/callhome/{}/{}.cha".format(langage, file))
return audio_files, cha_files