align/data.py [71:82]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                    matching = regex.match(r'<s snum=([0-9]*)>(.*)</s>', line.strip())
                    assert matching is not None
                    idx = matching.group(1)
                    sent = matching.group(2).strip()
                    trg_id2s[idx] = sent
            src_sents = [src_id2s[key] for key in sorted(src_id2s.keys())]
            trg_sents = [trg_id2s[key] for key in sorted(trg_id2s.keys())]
            snum2idx = dict([(key, i) for i, key in enumerate(sorted(trg_id2s.keys()))])
            assert len(src_id2s) == len(trg_id2s)
            ground_truth = [list() for _ in src_id2s]
            raw_gt = open(os.path.join(path, langs, split, f'{split}.wa.nonullalign')).readlines()
            for line in raw_gt:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



align/data.py [106:117]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                matching = regex.match(r'<s snum=([0-9]*)>(.*)</s>', line.strip())
                assert matching is not None
                idx = matching.group(1)
                sent = matching.group(2).strip()
                trg_id2s[idx] = sent
            src_sents = [src_id2s[key] for key in sorted(src_id2s.keys())]
            trg_sents = [trg_id2s[key] for key in sorted(trg_id2s.keys())]
            snum2idx = dict([(key, i) for i, key in enumerate(sorted(trg_id2s.keys()))])
            assert len(src_id2s) == len(trg_id2s)
            ground_truth = [list() for _ in src_id2s]
            raw_gt = open(os.path.join(path, langs, split, f'{split}.wa.nonullalign')).readlines()
            for line in raw_gt:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



