fn like_escape()

in arrow-string/src/like.rs [2118:2356]


    fn like_escape() {
        // (value, pattern, expected)
        let test_cases = vec![
            // Empty pattern
            (r"", r"", true),
            (r"\", r"", false),
            // Sole (dangling) escape (some engines consider this invalid pattern)
            (r"", r"\", false),
            (r"\", r"\", true),
            (r"\\", r"\", false),
            (r"a", r"\", false),
            (r"\a", r"\", false),
            (r"\\a", r"\", false),
            // Sole escape
            (r"", r"\\", false),
            (r"\", r"\\", true),
            (r"\\", r"\\", false),
            (r"a", r"\\", false),
            (r"\a", r"\\", false),
            (r"\\a", r"\\", false),
            // Sole escape and dangling escape
            (r"", r"\\\", false),
            (r"\", r"\\\", false),
            (r"\\", r"\\\", true),
            (r"\\\", r"\\\", false),
            (r"\\\\", r"\\\", false),
            (r"a", r"\\\", false),
            (r"\a", r"\\\", false),
            (r"\\a", r"\\\", false),
            // Sole two escapes
            (r"", r"\\\\", false),
            (r"\", r"\\\\", false),
            (r"\\", r"\\\\", true),
            (r"\\\", r"\\\\", false),
            (r"\\\\", r"\\\\", false),
            (r"\\\\\", r"\\\\", false),
            (r"a", r"\\\\", false),
            (r"\a", r"\\\\", false),
            (r"\\a", r"\\\\", false),
            // Escaped non-wildcard
            (r"", r"\a", false),
            (r"\", r"\a", false),
            (r"\\", r"\a", false),
            (r"a", r"\a", true),
            (r"\a", r"\a", false),
            (r"\\a", r"\a", false),
            // Escaped _ wildcard
            (r"", r"\_", false),
            (r"\", r"\_", false),
            (r"\\", r"\_", false),
            (r"a", r"\_", false),
            (r"_", r"\_", true),
            (r"%", r"\_", false),
            (r"\a", r"\_", false),
            (r"\\a", r"\_", false),
            (r"\_", r"\_", false),
            (r"\\_", r"\_", false),
            // Escaped % wildcard
            (r"", r"\%", false),
            (r"\", r"\%", false),
            (r"\\", r"\%", false),
            (r"a", r"\%", false),
            (r"_", r"\%", false),
            (r"%", r"\%", true),
            (r"\a", r"\%", false),
            (r"\\a", r"\%", false),
            (r"\%", r"\%", false),
            (r"\\%", r"\%", false),
            // Escape and non-wildcard
            (r"", r"\\a", false),
            (r"\", r"\\a", false),
            (r"\\", r"\\a", false),
            (r"a", r"\\a", false),
            (r"\a", r"\\a", true),
            (r"\\a", r"\\a", false),
            (r"\\\a", r"\\a", false),
            // Escape and _ wildcard
            (r"", r"\\_", false),
            (r"\", r"\\_", false),
            (r"\\", r"\\_", true),
            (r"a", r"\\_", false),
            (r"_", r"\\_", false),
            (r"%", r"\\_", false),
            (r"\a", r"\\_", true),
            (r"\\a", r"\\_", false),
            (r"\_", r"\\_", true),
            (r"\\_", r"\\_", false),
            (r"\\\_", r"\\_", false),
            // Escape and % wildcard
            (r"", r"\\%", false),
            (r"\", r"\\%", true),
            (r"\\", r"\\%", true),
            (r"a", r"\\%", false),
            (r"ab", r"\\%", false),
            (r"a%", r"\\%", false),
            (r"_", r"\\%", false),
            (r"%", r"\\%", false),
            (r"\a", r"\\%", true),
            (r"\\a", r"\\%", true),
            (r"\%", r"\\%", true),
            (r"\\%", r"\\%", true),
            (r"\\\%", r"\\%", true),
            // %... pattern with dangling wildcard
            (r"\", r"%\", true),
            (r"\\", r"%\", true),
            (r"%\", r"%\", true),
            (r"%\\", r"%\", true),
            (r"abc\", r"%\", true),
            (r"abc", r"%\", false),
            // %... pattern with wildcard
            (r"\", r"%\\", true),
            (r"\\", r"%\\", true),
            (r"%\\", r"%\\", true),
            (r"%\\\", r"%\\", true),
            (r"abc\", r"%\\", true),
            (r"abc", r"%\\", false),
            // %... pattern including escaped non-wildcard
            (r"ac", r"%a\c", true),
            (r"xyzac", r"%a\c", true),
            (r"abc", r"%a\c", false),
            (r"a\c", r"%a\c", false),
            (r"%a\c", r"%a\c", false),
            // %... pattern including escape
            (r"\", r"%a\\c", false),
            (r"\\", r"%a\\c", false),
            (r"ac", r"%a\\c", false),
            (r"a\c", r"%a\\c", true),
            (r"a\\c", r"%a\\c", false),
            (r"abc", r"%a\\c", false),
            (r"xyza\c", r"%a\\c", true),
            (r"xyza\\c", r"%a\\c", false),
            (r"%a\\c", r"%a\\c", false),
            // ...% pattern with wildcard
            (r"\", r"\\%", true),
            (r"\\", r"\\%", true),
            (r"\\%", r"\\%", true),
            (r"\\\%", r"\\%", true),
            (r"\abc", r"\\%", true),
            (r"a", r"\\%", false),
            (r"abc", r"\\%", false),
            // ...% pattern including escaped non-wildcard
            (r"ac", r"a\c%", true),
            (r"acxyz", r"a\c%", true),
            (r"abc", r"a\c%", false),
            (r"a\c", r"a\c%", false),
            (r"a\c%", r"a\c%", false),
            (r"a\\c%", r"a\c%", false),
            // ...% pattern including escape
            (r"ac", r"a\\c%", false),
            (r"a\c", r"a\\c%", true),
            (r"a\cxyz", r"a\\c%", true),
            (r"a\\c", r"a\\c%", false),
            (r"a\\cxyz", r"a\\c%", false),
            (r"abc", r"a\\c%", false),
            (r"abcxyz", r"a\\c%", false),
            (r"a\\c%", r"a\\c%", false),
            // %...% pattern including escaped non-wildcard
            (r"ac", r"%a\c%", true),
            (r"xyzacxyz", r"%a\c%", true),
            (r"abc", r"%a\c%", false),
            (r"a\c", r"%a\c%", false),
            (r"xyza\cxyz", r"%a\c%", false),
            (r"%a\c%", r"%a\c%", false),
            (r"%a\\c%", r"%a\c%", false),
            // %...% pattern including escape
            (r"ac", r"%a\\c%", false),
            (r"a\c", r"%a\\c%", true),
            (r"xyza\cxyz", r"%a\\c%", true),
            (r"a\\c", r"%a\\c%", false),
            (r"xyza\\cxyz", r"%a\\c%", false),
            (r"abc", r"%a\\c%", false),
            (r"xyzabcxyz", r"%a\\c%", false),
            (r"%a\\c%", r"%a\\c%", false),
            // Odd (7) backslashes and % wildcard
            (r"\\%", r"\\\\\\\%", false),
            (r"\\\", r"\\\\\\\%", false),
            (r"\\\%", r"\\\\\\\%", true),
            (r"\\\\", r"\\\\\\\%", false),
            (r"\\\\%", r"\\\\\\\%", false),
            (r"\\\\\\\%", r"\\\\\\\%", false),
            // Odd (7) backslashes and _ wildcard
            (r"\\\", r"\\\\\\\_", false),
            (r"\\\\", r"\\\\\\\_", false),
            (r"\\\_", r"\\\\\\\_", true),
            (r"\\\\", r"\\\\\\\_", false),
            (r"\\\a", r"\\\\\\\_", false),
            (r"\\\\_", r"\\\\\\\_", false),
            (r"\\\\\\\_", r"\\\\\\\_", false),
            // Even (8) backslashes and % wildcard
            (r"\\\", r"\\\\\\\\%", false),
            (r"\\\\", r"\\\\\\\\%", true),
            (r"\\\\\", r"\\\\\\\\%", true),
            (r"\\\\xyz", r"\\\\\\\\%", true),
            (r"\\\\\\\\%", r"\\\\\\\\%", true),
            // Even (8) backslashes and _ wildcard
            (r"\\\", r"\\\\\\\\_", false),
            (r"\\\\", r"\\\\\\\\_", false),
            (r"\\\\\", r"\\\\\\\\_", true),
            (r"\\\\a", r"\\\\\\\\_", true),
            (r"\\\\\a", r"\\\\\\\\_", false),
            (r"\\\\ab", r"\\\\\\\\_", false),
            (r"\\\\\\\\_", r"\\\\\\\\_", false),
        ];

        for (value, pattern, expected) in test_cases {
            let unexpected = BooleanArray::from(vec![!expected]);
            let expected = BooleanArray::from(vec![expected]);

            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
                    make_datums(value, &string_type),
                    make_datums(pattern, &string_type),
                ) {
                    let value_datum = value_datum.as_ref();
                    let pattern_datum = pattern_datum.as_ref();
                    assert_eq!(
                        like(value_datum, pattern_datum).unwrap(),
                        expected,
                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
                    );
                    assert_eq!(
                        ilike(value_datum, pattern_datum).unwrap(),
                        expected,
                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
                    );
                    assert_eq!(
                        nlike(value_datum, pattern_datum).unwrap(),
                        unexpected,
                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
                    );
                    assert_eq!(
                        nilike(value_datum, pattern_datum).unwrap(),
                        unexpected,
                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
                    );
                }
            }
        }
    }