def find_files()

in recipes/data/fisher/utils.py [0:0]


def find_files(src):
    src_dirs = src.split(",")
    required_dirs = [
        "fe_03_p1_sph1",
        "fe_03_p1_sph3",
        "fe_03_p1_sph5",
        "fe_03_p1_sph7",
        "fe_03_p2_sph1",
        "fe_03_p2_sph3",
        "fe_03_p2_sph5",
        "fe_03_p2_sph7",
        "fe_03_p1_sph2",
        "fe_03_p1_sph4",
        "fe_03_p1_sph6",
        "fe_03_p2_sph2",
        "fe_03_p2_sph4",
        "fe_03_p2_sph6",
        "fe_03_p1_tran",
        "fe_03_p2_tran",
    ]
    dir_mapping = {}
    for dir in src_dirs:
        for curdir in os.listdir(dir):
            fulldir = os.path.join(dir, curdir)
            if not os.path.isdir(fulldir):
                continue
            for req_dir in required_dirs:
                new_style_req_dir = req_dir.replace(
                    "fe_03_p1_sph", "fisher_eng_tr_sp_d"
                )
                if curdir == req_dir or curdir == new_style_req_dir:
                    dir_mapping[req_dir] = fulldir
                    continue

    transcript_files = {}
    audio_files = {}
    for dir in required_dirs:
        assert dir in dir_mapping, "could not find the subdirectory {}".format(dir)
        fulldir = dir_mapping[dir]
        if "tran" in fulldir:
            fulldir = os.path.join(fulldir, "data")
        for dirpath, _, filenames in os.walk(fulldir):
            for filename in filenames:
                key = filename.split(".")[0]
                if filename.startswith("fe_") and filename.endswith(".txt"):
                    transcript_files[key] = os.path.join(dirpath, filename)
                elif filename.endswith(".sph"):
                    audio_files[key] = os.path.join(dirpath, filename)

    return [(audio_files[k], transcript_files[k]) for k in audio_files]