def filter_data_by_id()

in recipes/joint_training_vox_populi/prepare_data/common_voice_to_wav2letter.py [0:0]


def filter_data_by_id(input_lst: List[FileInfo], to_filter: List[str]):

    input_lst.sort(key=lambda x: x.id_)
    to_filter.sort()

    index_filter = 0
    len_filter = len(to_filter)
    out = []
    for lst_data in input_lst:
        id_ = lst_data.id_
        while index_filter < len_filter and to_filter[index_filter] < id_:
            index_filter += 1

        if index_filter >= len_filter:
            break

        if to_filter[index_filter] == id_:
            out.append(lst_data)

    print(f"{len(out)} files out of {len(to_filter)}")

    return out