in tasks/CCMatrix/dl_cc_matrix.py [0:0]
def transpose_file(outdir: Path, file: Path) -> None:
sentinel_file = file.with_suffix(".transposed")
if sentinel_file.exists():
return
outputs: Dict[str, FileWriterWithTmp] = {}
parser = get_typed_parser(Bitext)
success = False
try:
for line in open_read(file):
bt: Bitext = parser(line)
lang_pair = bt.lang_pair
if bt.lang_pair not in outputs:
assert (
"/" in lang_pair
), f"Invalid lang pair '{lang_pair}' should be 'src-trg/src' or 'src-trg/trg'"
(outdir / f"{lang_pair}").mkdir(exist_ok=True, parents=True)
o = FileWriterWithTmp(outdir / f"{lang_pair}_{file.name}")
outputs[lang_pair] = o
simple_bt = SimpleBitext(bt.line_no, bt.score, bt.text)
print(*simple_bt, sep="\t", file=outputs[lang_pair])
success = True
finally:
for o in outputs.values():
o.close(success)
if success:
sentinel_file.write_text("\n".join(str(o.file) for o in outputs.values()))