def get_ami_files()

in datasets/spd_datasets.py [0:0]


def get_ami_files(path_to_ami, setup="only_words", hm_type="ihm"):

    """_summary_

    Returns:
        _type_: _description_
    """
    assert setup in ["only_words", "mini"]
    assert hm_type in ["ihm", "sdm"]

    rttm_files = {
        "train": glob.glob(path_to_ami + "/AMI-diarization-setup/{}/rttms/{}/*.rttm".format(setup, "train")),
        "validation": glob.glob(path_to_ami + "/AMI-diarization-setup/{}/rttms/{}/*.rttm".format(setup, "dev")),
        "test": glob.glob(path_to_ami + "/AMI-diarization-setup/{}/rttms/{}/*.rttm".format(setup, "test")),
    }

    audio_files = {
        "train": [],
        "validation": [],
        "test": [],
    }

    for subset in rttm_files:

        rttm_list = copy.deepcopy(rttm_files[subset])

        for rttm in rttm_list:
            meeting = rttm.split("/")[-1].split(".")[0]
            if hm_type == "ihm":
                path = path_to_ami + "/AMI-diarization-setup/pyannote/amicorpus/{}/audio/{}.Mix-Headset.wav".format(
                    meeting, meeting
                )
                if os.path.exists(path):
                    audio_files[subset].append(path)
                else:
                    rttm_files[subset].remove(rttm)
            if hm_type == "sdm":
                path = path_to_ami + "/AMI-diarization-setup/pyannote/amicorpus/{}/audio/{}.Array1-01.wav".format(
                    meeting, meeting
                )
                if os.path.exists(path):
                    audio_files[subset].append(path)
                else:
                    rttm_files[subset].remove(rttm)

    return audio_files, rttm_files