in arrow-string/src/like.rs [2118:2356]
fn like_escape() {
// (value, pattern, expected)
let test_cases = vec![
// Empty pattern
(r"", r"", true),
(r"\", r"", false),
// Sole (dangling) escape (some engines consider this invalid pattern)
(r"", r"\", false),
(r"\", r"\", true),
(r"\\", r"\", false),
(r"a", r"\", false),
(r"\a", r"\", false),
(r"\\a", r"\", false),
// Sole escape
(r"", r"\\", false),
(r"\", r"\\", true),
(r"\\", r"\\", false),
(r"a", r"\\", false),
(r"\a", r"\\", false),
(r"\\a", r"\\", false),
// Sole escape and dangling escape
(r"", r"\\\", false),
(r"\", r"\\\", false),
(r"\\", r"\\\", true),
(r"\\\", r"\\\", false),
(r"\\\\", r"\\\", false),
(r"a", r"\\\", false),
(r"\a", r"\\\", false),
(r"\\a", r"\\\", false),
// Sole two escapes
(r"", r"\\\\", false),
(r"\", r"\\\\", false),
(r"\\", r"\\\\", true),
(r"\\\", r"\\\\", false),
(r"\\\\", r"\\\\", false),
(r"\\\\\", r"\\\\", false),
(r"a", r"\\\\", false),
(r"\a", r"\\\\", false),
(r"\\a", r"\\\\", false),
// Escaped non-wildcard
(r"", r"\a", false),
(r"\", r"\a", false),
(r"\\", r"\a", false),
(r"a", r"\a", true),
(r"\a", r"\a", false),
(r"\\a", r"\a", false),
// Escaped _ wildcard
(r"", r"\_", false),
(r"\", r"\_", false),
(r"\\", r"\_", false),
(r"a", r"\_", false),
(r"_", r"\_", true),
(r"%", r"\_", false),
(r"\a", r"\_", false),
(r"\\a", r"\_", false),
(r"\_", r"\_", false),
(r"\\_", r"\_", false),
// Escaped % wildcard
(r"", r"\%", false),
(r"\", r"\%", false),
(r"\\", r"\%", false),
(r"a", r"\%", false),
(r"_", r"\%", false),
(r"%", r"\%", true),
(r"\a", r"\%", false),
(r"\\a", r"\%", false),
(r"\%", r"\%", false),
(r"\\%", r"\%", false),
// Escape and non-wildcard
(r"", r"\\a", false),
(r"\", r"\\a", false),
(r"\\", r"\\a", false),
(r"a", r"\\a", false),
(r"\a", r"\\a", true),
(r"\\a", r"\\a", false),
(r"\\\a", r"\\a", false),
// Escape and _ wildcard
(r"", r"\\_", false),
(r"\", r"\\_", false),
(r"\\", r"\\_", true),
(r"a", r"\\_", false),
(r"_", r"\\_", false),
(r"%", r"\\_", false),
(r"\a", r"\\_", true),
(r"\\a", r"\\_", false),
(r"\_", r"\\_", true),
(r"\\_", r"\\_", false),
(r"\\\_", r"\\_", false),
// Escape and % wildcard
(r"", r"\\%", false),
(r"\", r"\\%", true),
(r"\\", r"\\%", true),
(r"a", r"\\%", false),
(r"ab", r"\\%", false),
(r"a%", r"\\%", false),
(r"_", r"\\%", false),
(r"%", r"\\%", false),
(r"\a", r"\\%", true),
(r"\\a", r"\\%", true),
(r"\%", r"\\%", true),
(r"\\%", r"\\%", true),
(r"\\\%", r"\\%", true),
// %... pattern with dangling wildcard
(r"\", r"%\", true),
(r"\\", r"%\", true),
(r"%\", r"%\", true),
(r"%\\", r"%\", true),
(r"abc\", r"%\", true),
(r"abc", r"%\", false),
// %... pattern with wildcard
(r"\", r"%\\", true),
(r"\\", r"%\\", true),
(r"%\\", r"%\\", true),
(r"%\\\", r"%\\", true),
(r"abc\", r"%\\", true),
(r"abc", r"%\\", false),
// %... pattern including escaped non-wildcard
(r"ac", r"%a\c", true),
(r"xyzac", r"%a\c", true),
(r"abc", r"%a\c", false),
(r"a\c", r"%a\c", false),
(r"%a\c", r"%a\c", false),
// %... pattern including escape
(r"\", r"%a\\c", false),
(r"\\", r"%a\\c", false),
(r"ac", r"%a\\c", false),
(r"a\c", r"%a\\c", true),
(r"a\\c", r"%a\\c", false),
(r"abc", r"%a\\c", false),
(r"xyza\c", r"%a\\c", true),
(r"xyza\\c", r"%a\\c", false),
(r"%a\\c", r"%a\\c", false),
// ...% pattern with wildcard
(r"\", r"\\%", true),
(r"\\", r"\\%", true),
(r"\\%", r"\\%", true),
(r"\\\%", r"\\%", true),
(r"\abc", r"\\%", true),
(r"a", r"\\%", false),
(r"abc", r"\\%", false),
// ...% pattern including escaped non-wildcard
(r"ac", r"a\c%", true),
(r"acxyz", r"a\c%", true),
(r"abc", r"a\c%", false),
(r"a\c", r"a\c%", false),
(r"a\c%", r"a\c%", false),
(r"a\\c%", r"a\c%", false),
// ...% pattern including escape
(r"ac", r"a\\c%", false),
(r"a\c", r"a\\c%", true),
(r"a\cxyz", r"a\\c%", true),
(r"a\\c", r"a\\c%", false),
(r"a\\cxyz", r"a\\c%", false),
(r"abc", r"a\\c%", false),
(r"abcxyz", r"a\\c%", false),
(r"a\\c%", r"a\\c%", false),
// %...% pattern including escaped non-wildcard
(r"ac", r"%a\c%", true),
(r"xyzacxyz", r"%a\c%", true),
(r"abc", r"%a\c%", false),
(r"a\c", r"%a\c%", false),
(r"xyza\cxyz", r"%a\c%", false),
(r"%a\c%", r"%a\c%", false),
(r"%a\\c%", r"%a\c%", false),
// %...% pattern including escape
(r"ac", r"%a\\c%", false),
(r"a\c", r"%a\\c%", true),
(r"xyza\cxyz", r"%a\\c%", true),
(r"a\\c", r"%a\\c%", false),
(r"xyza\\cxyz", r"%a\\c%", false),
(r"abc", r"%a\\c%", false),
(r"xyzabcxyz", r"%a\\c%", false),
(r"%a\\c%", r"%a\\c%", false),
// Odd (7) backslashes and % wildcard
(r"\\%", r"\\\\\\\%", false),
(r"\\\", r"\\\\\\\%", false),
(r"\\\%", r"\\\\\\\%", true),
(r"\\\\", r"\\\\\\\%", false),
(r"\\\\%", r"\\\\\\\%", false),
(r"\\\\\\\%", r"\\\\\\\%", false),
// Odd (7) backslashes and _ wildcard
(r"\\\", r"\\\\\\\_", false),
(r"\\\\", r"\\\\\\\_", false),
(r"\\\_", r"\\\\\\\_", true),
(r"\\\\", r"\\\\\\\_", false),
(r"\\\a", r"\\\\\\\_", false),
(r"\\\\_", r"\\\\\\\_", false),
(r"\\\\\\\_", r"\\\\\\\_", false),
// Even (8) backslashes and % wildcard
(r"\\\", r"\\\\\\\\%", false),
(r"\\\\", r"\\\\\\\\%", true),
(r"\\\\\", r"\\\\\\\\%", true),
(r"\\\\xyz", r"\\\\\\\\%", true),
(r"\\\\\\\\%", r"\\\\\\\\%", true),
// Even (8) backslashes and _ wildcard
(r"\\\", r"\\\\\\\\_", false),
(r"\\\\", r"\\\\\\\\_", false),
(r"\\\\\", r"\\\\\\\\_", true),
(r"\\\\a", r"\\\\\\\\_", true),
(r"\\\\\a", r"\\\\\\\\_", false),
(r"\\\\ab", r"\\\\\\\\_", false),
(r"\\\\\\\\_", r"\\\\\\\\_", false),
];
for (value, pattern, expected) in test_cases {
let unexpected = BooleanArray::from(vec![!expected]);
let expected = BooleanArray::from(vec![expected]);
for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
make_datums(value, &string_type),
make_datums(pattern, &string_type),
) {
let value_datum = value_datum.as_ref();
let pattern_datum = pattern_datum.as_ref();
assert_eq!(
like(value_datum, pattern_datum).unwrap(),
expected,
"{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
);
assert_eq!(
ilike(value_datum, pattern_datum).unwrap(),
expected,
"{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
);
assert_eq!(
nlike(value_datum, pattern_datum).unwrap(),
unexpected,
"{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
);
assert_eq!(
nilike(value_datum, pattern_datum).unwrap(),
unexpected,
"{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
);
}
}
}
}