fn transform_range_single_bytes()

in tokenizers/src/tokenizer/normalizer.rs [1511:1854]


    fn transform_range_single_bytes() {
        let s = NormalizedString::from("Hello friend");

        // Removing at the beginning
        let mut current = s.clone();
        current.transform_range(Range::Original(0..4), vec![('Y', 0)], 3);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "Yo friend".into(),
                alignments: vec![
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );

        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 0),
                (0, 0),
                (0, 0),
                (0, 1),
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 8),
                (8, 9)
            ]
        );

        // Removing in the middle
        let mut current = s.clone();
        current.transform_range(
            Range::Original(3..10),
            vec![('_', 0), ('F', 0), ('R', -2)],
            2,
        );
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "Hel_FRnd".into(),
                alignments: vec![
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );

        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 1),
                (1, 2),
                (2, 3),
                (3, 3),
                (3, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 6),
                (6, 6),
                (6, 7),
                (7, 8)
            ]
        );

        // Removing at the end
        let mut current = s.clone();
        current.transform_range(Range::Original(5..), vec![('_', 0), ('F', -5)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "Hello_F".into(),
                alignments: vec![(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 1),
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 7),
                (7, 7),
                (7, 7),
                (7, 7),
                (7, 7)
            ]
        );

        // Adding at the beginning
        let mut current = s.clone();
        current.transform_range(Range::Original(0..1), vec![('H', 1), ('H', 0)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "HHello friend".into(),
                alignments: vec![
                    (0, 0),
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 8),
                (8, 9),
                (9, 10),
                (10, 11),
                (11, 12),
                (12, 13)
            ]
        );
        // Equivalent to the previous one
        let mut current = s.clone();
        current.transform_range(Range::Original(0..0), vec![('H', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "HHello friend".into(),
                alignments: vec![
                    (0, 0),
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 8),
                (8, 9),
                (9, 10),
                (10, 11),
                (11, 12),
                (12, 13)
            ]
        );
        // Adding as part of the first character
        let mut current = s.clone();
        current.transform_range(Range::Original(0..1), vec![('H', 0), ('H', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "HHello friend".into(),
                alignments: vec![
                    (0, 1),
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );

        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 8),
                (8, 9),
                (9, 10),
                (10, 11),
                (11, 12),
                (12, 13)
            ]
        );

        // Adding in the middle
        let mut current = s.clone();
        current.transform_range(
            Range::Original(5..6),
            vec![('_', 0), ('m', 1), ('y', 1), ('_', 1)],
            0,
        );
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "Hello_my_friend".into(),
                alignments: vec![
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (5, 6),
                    (5, 6),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 1),
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 9),
                (9, 10),
                (10, 11),
                (11, 12),
                (12, 13),
                (13, 14),
                (14, 15)
            ]
        );

        // Adding at the end
        let mut current = s;
        current.transform_range(Range::Original(11..), vec![('d', 0), ('_', 1), ('!', 1)], 0);
        assert_eq!(
            current,
            NormalizedString {
                original: "Hello friend".into(),
                normalized: "Hello friend_!".into(),
                alignments: vec![
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4),
                    (4, 5),
                    (5, 6),
                    (6, 7),
                    (7, 8),
                    (8, 9),
                    (9, 10),
                    (10, 11),
                    (11, 12),
                    (11, 12),
                    (11, 12)
                ],
                original_shift: 0,
            }
        );
        assert_eq!(
            current.alignments_original(),
            vec![
                (0, 1),
                (1, 2),
                (2, 3),
                (3, 4),
                (4, 5),
                (5, 6),
                (6, 7),
                (7, 8),
                (8, 9),
                (9, 10),
                (10, 11),
                (11, 14)
            ]
        );
    }