def validate()

in tasks/CCMatrix/dl_cc_matrix.py [0:0]


def validate(src_file: Path, trg_file: Path) -> None:
    """Checks that the segments in the given batch are valid."""
    lines_src, lines_trg, found_pairs = 0, 0, 0
    parser = get_typed_parser(SimpleBitext)
    with open(src_file) as src_f, open(trg_file) as trg_f:
        src_l = src_f.readline()
        trg_l = trg_f.readline()
        while src_l and trg_l:
            src: SimpleBitext = parser(src_l)
            trg: SimpleBitext = parser(trg_l)
            if src.line_no <= trg.line_no:
                lines_src += 1
                src_l = src_f.readline()
            if trg.line_no <= src.line_no:
                lines_trg += 1
                trg_l = trg_f.readline()
            if trg.line_no == src.line_no:
                found_pairs += 1

    if found_pairs == lines_src and found_pairs == lines_trg:
        logging.info(
            f"Validated {src_file} and {trg_file}. Found {found_pairs} bitexts."
        )
    else:
        logging.error(
            f"Validated {src_file} and {trg_file}. "
            f"Found {found_pairs} bitexts, from {lines_src} in {src_file} and {lines_trg} in {trg_file}"
        )