fn transform_range_multiple_bytes()

in tokenizers/src/tokenizer/normalizer.rs [1857:2281]


    fn transform_range_multiple_bytes() {
        let s = NormalizedString::from("𝔾𝕠𝕠𝕕");

        // Removing at the beginning
        let mut current = s.clone();
        current.transform_range(Range::Original(0..8), vec![('G', -1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "G𝕠𝕕".into(),
                alignments: vec![
                    (0, 4),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 1),
                (0, 1),
                (0, 1),
                (0, 1),
                (1, 1),
                (1, 1),
                (1, 1),
                (1, 1),
                (1, 5),
                (1, 5),
                (1, 5),
                (1, 5),
                (5, 9),
                (5, 9),
                (5, 9),
                (5, 9)
            ]
        );
        assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "G");
        assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "G");
        assert_eq!(
            current.get_range_original(Range::Original(0..4)).unwrap(),
            "𝔾"
        );
        assert_eq!(
            current.get_range_original(Range::Original(0..8)).unwrap(),
            "𝔾𝕠"
        );

        // Removing in the middle
        let mut current = s.clone();
        current.transform_range(Range::Original(4..12), vec![('o', -1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "𝔾o𝕕".into(),
                alignments: vec![
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 4),
                (0, 4),
                (0, 4),
                (0, 4),
                (4, 5),
                (4, 5),
                (4, 5),
                (4, 5),
                (5, 5),
                (5, 5),
                (5, 5),
                (5, 5),
                (5, 9),
                (5, 9),
                (5, 9),
                (5, 9)
            ]
        );

        // Removing at the end
        let mut current = s.clone();
        current.transform_range(Range::Original(12..), vec![('d', 0), ('!', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "𝔾𝕠𝕠d!".into(),
                alignments: vec![
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );

        // Adding at the beginning
        let mut current = s.clone();
        current.transform_range(Range::Original(0..4), vec![('_', 1), ('𝔾', 0)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "_𝔾𝕠𝕠𝕕".into(),
                alignments: vec![
                    (0, 0),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (1, 5),
                (1, 5),
                (1, 5),
                (1, 5),
                (5, 9),
                (5, 9),
                (5, 9),
                (5, 9),
                (9, 13),
                (9, 13),
                (9, 13),
                (9, 13),
                (13, 17),
                (13, 17),
                (13, 17),
                (13, 17)
            ]
        );

        assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "𝔾𝕠");
        assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "𝔾");
        assert_eq!(
            current.get_range_original(Range::Original(0..4)).unwrap(),
            "𝔾"
        );
        assert_eq!(
            current.get_range_original(Range::Original(0..8)).unwrap(),
            "𝔾𝕠"
        );
        // Equivalent to the previous one
        let mut current = s.clone();
        current.transform_range(Range::Original(0..0), vec![('_', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "_𝔾𝕠𝕠𝕕".into(),
                alignments: vec![
                    (0, 0),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (1, 5),
                (1, 5),
                (1, 5),
                (1, 5),
                (5, 9),
                (5, 9),
                (5, 9),
                (5, 9),
                (9, 13),
                (9, 13),
                (9, 13),
                (9, 13),
                (13, 17),
                (13, 17),
                (13, 17),
                (13, 17)
            ]
        );

        assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "𝔾𝕠");
        assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "𝔾");
        assert_eq!(
            current.get_range_original(Range::Original(0..4)).unwrap(),
            "𝔾"
        );
        assert_eq!(
            current.get_range_original(Range::Original(0..8)).unwrap(),
            "𝔾𝕠"
        );
        // Adding as part of the first character
        let mut current = s.clone();
        current.transform_range(Range::Original(0..4), vec![('𝔾', 0), ('o', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "𝔾o𝕠𝕠𝕕".into(),
                alignments: vec![
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 5),
                (0, 5),
                (0, 5),
                (0, 5),
                (5, 9),
                (5, 9),
                (5, 9),
                (5, 9),
                (9, 13),
                (9, 13),
                (9, 13),
                (9, 13),
                (13, 17),
                (13, 17),
                (13, 17),
                (13, 17)
            ]
        );
        assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "𝔾o𝕠");
        assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "𝔾o");
        assert_eq!(
            current.get_range_original(Range::Original(0..4)).unwrap(),
            "𝔾"
        );
        assert_eq!(
            current.get_range_original(Range::Original(0..8)).unwrap(),
            "𝔾𝕠"
        );

        // Adding in the middle
        let mut current = s.clone();
        current.transform_range(
            Range::Original(4..8),
            vec![('𝕠', 0), ('o', 1), ('o', 1), ('o', 1)],
            0,
        );
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "𝔾𝕠ooo𝕠𝕕".into(),
                alignments: vec![
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 4),
                (0, 4),
                (0, 4),
                (0, 4),
                (4, 11),
                (4, 11),
                (4, 11),
                (4, 11),
                (11, 15),
                (11, 15),
                (11, 15),
                (11, 15),
                (15, 19),
                (15, 19),
                (15, 19),
                (15, 19)
            ]
        );

        // Adding at the end
        let mut current = s;
        current.transform_range(Range::Original(16..), vec![('!', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "𝔾𝕠𝕠𝕕".into(),
                normalized: "𝔾𝕠𝕠𝕕!".into(),
                alignments: vec![
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (0, 4),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (4, 8),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (8, 12),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16),
                    (12, 16)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 4),
                (0, 4),
                (0, 4),
                (0, 4),
                (4, 8),
                (4, 8),
                (4, 8),
                (4, 8),
                (8, 12),
                (8, 12),
                (8, 12),
                (8, 12),
                (12, 17),
                (12, 17),
                (12, 17),
                (12, 17)
            ]
        );
    }