in recipes/joint_training_vox_populi/prepare_data/common_voice_to_wav2letter.py [0:0]
def filter_data_by_id(input_lst: List[FileInfo], to_filter: List[str]):
input_lst.sort(key=lambda x: x.id_)
to_filter.sort()
index_filter = 0
len_filter = len(to_filter)
out = []
for lst_data in input_lst:
id_ = lst_data.id_
while index_filter < len_filter and to_filter[index_filter] < id_:
index_filter += 1
if index_filter >= len_filter:
break
if to_filter[index_filter] == id_:
out.append(lst_data)
print(f"{len(out)} files out of {len(to_filter)}")
return out