in pyiceberg/expressions/visitors.py [0:0]
def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> bool:
field = term.ref().field
field_id = field.field_id
if self._contains_nulls_only(field_id) or self._contains_nans_only(field_id):
return ROWS_CANNOT_MATCH
if len(literals) > IN_PREDICATE_LIMIT:
# skip evaluating the predicate if the number of values is too big
return ROWS_MIGHT_MATCH
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
if lower_bound_bytes := self.lower_bounds.get(field_id):
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
if self._is_nan(lower_bound):
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
literals = {lit for lit in literals if lower_bound <= lit} # type: ignore[operator]
if len(literals) == 0:
return ROWS_CANNOT_MATCH
if upper_bound_bytes := self.upper_bounds.get(field_id):
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
# this is different from Java, here NaN is always larger
if self._is_nan(upper_bound):
return ROWS_MIGHT_MATCH
literals = {lit for lit in literals if upper_bound >= lit} # type: ignore[operator]
if len(literals) == 0:
return ROWS_CANNOT_MATCH
return ROWS_MIGHT_MATCH