in voxpopuli/segmentation/cut_with_align_files.py [0:0]
def get_session_ids(root_align: Path, root_wer: Path, lang: str) -> Set[str]:
files_align = [
x.name
for x in root_align.glob(f"*_{lang}_align_wav2letter.txt")
if is_id_valid(x.name[:-24])
]
files_wer = [
x.name
for x in root_wer.glob(f"*_{lang}_wer_no_lm_wav2letter.json")
if is_id_valid(x.name[:-29])
]
ids_align = {x[:-24] for x in files_align}
ids_wer = {x[:-29] for x in files_wer}
return ids_align.intersection(ids_wer)