in recipes/data/fisher/utils.py [0:0]
def find_files(src):
src_dirs = src.split(",")
required_dirs = [
"fe_03_p1_sph1",
"fe_03_p1_sph3",
"fe_03_p1_sph5",
"fe_03_p1_sph7",
"fe_03_p2_sph1",
"fe_03_p2_sph3",
"fe_03_p2_sph5",
"fe_03_p2_sph7",
"fe_03_p1_sph2",
"fe_03_p1_sph4",
"fe_03_p1_sph6",
"fe_03_p2_sph2",
"fe_03_p2_sph4",
"fe_03_p2_sph6",
"fe_03_p1_tran",
"fe_03_p2_tran",
]
dir_mapping = {}
for dir in src_dirs:
for curdir in os.listdir(dir):
fulldir = os.path.join(dir, curdir)
if not os.path.isdir(fulldir):
continue
for req_dir in required_dirs:
new_style_req_dir = req_dir.replace(
"fe_03_p1_sph", "fisher_eng_tr_sp_d"
)
if curdir == req_dir or curdir == new_style_req_dir:
dir_mapping[req_dir] = fulldir
continue
transcript_files = {}
audio_files = {}
for dir in required_dirs:
assert dir in dir_mapping, "could not find the subdirectory {}".format(dir)
fulldir = dir_mapping[dir]
if "tran" in fulldir:
fulldir = os.path.join(fulldir, "data")
for dirpath, _, filenames in os.walk(fulldir):
for filename in filenames:
key = filename.split(".")[0]
if filename.startswith("fe_") and filename.endswith(".txt"):
transcript_files[key] = os.path.join(dirpath, filename)
elif filename.endswith(".sph"):
audio_files[key] = os.path.join(dirpath, filename)
return [(audio_files[k], transcript_files[k]) for k in audio_files]