in crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs [393:460]
fn not_starts_with(
&mut self,
reference: &BoundReference,
datum: &Datum,
_predicate: &BoundPredicate,
) -> Result<bool> {
let field_id = reference.field().id;
if self.may_contain_null(field_id) {
return ROW_GROUP_MIGHT_MATCH;
}
// notStartsWith will match unless all values must start with the prefix.
// This happens when the lower and upper bounds both start with the prefix.
let PrimitiveLiteral::String(prefix) = datum.literal() else {
return Err(Error::new(
ErrorKind::Unexpected,
"Cannot use StartsWith operator on non-string values",
));
};
let Some(lower_bound) = self.min_value(field_id)? else {
return ROW_GROUP_MIGHT_MATCH;
};
let PrimitiveLiteral::String(lower_bound_str) = lower_bound.literal() else {
return Err(Error::new(
ErrorKind::Unexpected,
"Cannot use NotStartsWith operator on non-string lower_bound value",
));
};
if lower_bound_str < prefix {
// if lower is shorter than the prefix then lower doesn't start with the prefix
return ROW_GROUP_MIGHT_MATCH;
}
let prefix_len = prefix.chars().count();
if lower_bound_str.chars().take(prefix_len).collect::<String>() == *prefix {
// lower bound matches the prefix
let Some(upper_bound) = self.max_value(field_id)? else {
return ROW_GROUP_MIGHT_MATCH;
};
let PrimitiveLiteral::String(upper_bound) = upper_bound.literal() else {
return Err(Error::new(
ErrorKind::Unexpected,
"Cannot use NotStartsWith operator on non-string upper_bound value",
));
};
// if upper is shorter than the prefix then upper can't start with the prefix
if upper_bound.chars().count() < prefix_len {
return ROW_GROUP_MIGHT_MATCH;
}
if upper_bound.chars().take(prefix_len).collect::<String>() == *prefix {
// both bounds match the prefix, so all rows must match the
// prefix and therefore do not satisfy the predicate
return ROW_GROUP_CANT_MATCH;
}
}
ROW_GROUP_MIGHT_MATCH
}