def get_session_ids()

in voxpopuli/segmentation/cut_with_align_files.py [0:0]


def get_session_ids(root_align: Path, root_wer: Path, lang: str) -> Set[str]:

    files_align = [
        x.name
        for x in root_align.glob(f"*_{lang}_align_wav2letter.txt")
        if is_id_valid(x.name[:-24])
    ]
    files_wer = [
        x.name
        for x in root_wer.glob(f"*_{lang}_wer_no_lm_wav2letter.json")
        if is_id_valid(x.name[:-29])
    ]
    ids_align = {x[:-24] for x in files_align}
    ids_wer = {x[:-29] for x in files_wer}

    return ids_align.intersection(ids_wer)