in starlark/src/values/types/string/fast_string.rs [42:96]
fn skip_at_most_1byte(x: &str, n: usize) -> usize {
if n == 0 {
return 0;
}
debug_assert!(x.len() >= n);
// Multi-byte UTF8 characters have 0x80 set.
// We first process enough characters so we align on an 8-byte boundary,
// then process 8 bytes at a time.
// If we see a higher value, we bail to the standard Rust code.
// It is possible to do faster with population count, but we don't expect many real UTF8 strings.
// (c.f. https://github.com/haskell-foundation/foundation/blob/master/foundation/cbits/foundation_utf8.c)
// Same function, but returning the end of the string
fn f(x: &str, n: usize) -> *const u8 {
let leading = min(x.as_ptr().align_offset(8), n);
let trailing = (n - leading) % 8;
let loops = (n - leading) / 8;
// Rather than flip between string and pointer, we stick to working with the pointer
let mut p = x.as_ptr();
// Loop over 1 byte at a time until we reach alignment
for _ in 0..leading {
if is_1byte(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p;
}
}
// Loop over 8 bytes at a time, until we reach the end
let mut p = p as *const u64;
for _ in 0..loops {
if is_1bytes(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p as *const u8;
}
}
// Mop up all trailing bytes
let mut p = p as *const u8;
for _ in 0..trailing {
if is_1byte(unsafe { *p }) {
p = unsafe { p.add(1) };
} else {
return p;
}
}
return p;
}
unsafe { f(x, n).offset_from(x.as_ptr()) as usize }
}