in components/places/src/match_impl.rs [141:191]
fn next_search_candidate(to_search: &str, search_for: char) -> Option<usize> {
// If the character we search for is ASCII, then we can scan until we find
// it or its ASCII uppercase character, modulo the special cases
// U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+212A KELVIN SIGN
// (which are the only non-ASCII characters that lower-case to ASCII ones).
// Since false positives are okay, we approximate ASCII lower-casing by
// bit-ORing with 0x20, for increased performance.
//
// If the character we search for is *not* ASCII, we can ignore everything
// that is, since all ASCII characters lower-case to ASCII.
//
// Because of how UTF-8 uses high-order bits, this will never land us
// in the middle of a codepoint.
//
// The assumptions about Unicode made here are verified in test_casing.
let search_bytes = to_search.as_bytes();
if (search_for as u32) < 128 {
// When searching for I or K, we pick out the first byte of the UTF-8
// encoding of the corresponding special case character, and look for it
// in the loop below. For other characters we fall back to 0xff, which
// is not a valid UTF-8 byte.
let target = dubious_to_ascii_lower(search_for as u8);
let special = if target == b'i' {
0xc4u8
} else if target == b'k' {
0xe2u8
} else {
0xffu8
};
// Note: rustc doesn't do great at all on the more idiomatic
// implementation of this (or below), but it does okay for this.
let mut ci = 0;
while ci < search_bytes.len() {
let cur = search_bytes[ci];
if dubious_to_ascii_lower(cur) == target || cur == special {
return Some(ci);
}
ci += 1;
}
} else {
let mut ci = 0;
while ci < search_bytes.len() {
let cur = search_bytes[ci];
if (cur & 0x80) != 0 {
return Some(ci);
}
ci += 1;
}
}
None
}