fn added_characters_alignment()

in tokenizers/src/tokenizer/normalizer.rs [1236:1286]


    fn added_characters_alignment() {
        let mut n = NormalizedString::from("野口 No");
        n.transform(
            n.get().to_owned().chars().flat_map(|c| {
                if (c as usize) > 0x4E00 {
                    vec![(' ', 0), (c, 1), (' ', 1)]
                } else {
                    vec![(c, 0)]
                }
            }),
            0,
        );

        assert_eq!(
            n,
            NormalizedString {
                original: "野口 No".into(),
                normalized: " 野  口  No".into(),
                alignments: vec![
                    (0, 3),
                    (0, 3),
                    (0, 3),
                    (0, 3),
                    (0, 3),
                    (3, 6),
                    (3, 6),
                    (3, 6),
                    (3, 6),
                    (3, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9)
                ],
                original_shift: 0
            }
        );
        assert_eq!(
            n.alignments_original(),
            vec![
                (0, 5),
                (0, 5),
                (0, 5),
                (5, 10),
                (5, 10),
                (5, 10),
                (10, 11),
                (11, 12),
                (12, 13)
            ]
        );
    }