in spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkFilters.java [111:227]
public static Expression convert(Filter filter) {
// avoid using a chain of if instanceof statements by mapping to the expression enum.
Operation op = FILTERS.get(filter.getClass());
if (op != null) {
switch (op) {
case TRUE:
return Expressions.alwaysTrue();
case FALSE:
return Expressions.alwaysFalse();
case IS_NULL:
IsNull isNullFilter = (IsNull) filter;
return isNull(unquote(isNullFilter.attribute()));
case NOT_NULL:
IsNotNull notNullFilter = (IsNotNull) filter;
return notNull(unquote(notNullFilter.attribute()));
case LT:
LessThan lt = (LessThan) filter;
return lessThan(unquote(lt.attribute()), convertLiteral(lt.value()));
case LT_EQ:
LessThanOrEqual ltEq = (LessThanOrEqual) filter;
return lessThanOrEqual(unquote(ltEq.attribute()), convertLiteral(ltEq.value()));
case GT:
GreaterThan gt = (GreaterThan) filter;
return greaterThan(unquote(gt.attribute()), convertLiteral(gt.value()));
case GT_EQ:
GreaterThanOrEqual gtEq = (GreaterThanOrEqual) filter;
return greaterThanOrEqual(unquote(gtEq.attribute()), convertLiteral(gtEq.value()));
case EQ: // used for both eq and null-safe-eq
if (filter instanceof EqualTo) {
EqualTo eq = (EqualTo) filter;
// comparison with null in normal equality is always null. this is probably a mistake.
Preconditions.checkNotNull(
eq.value(), "Expression is always false (eq is not null-safe): %s", filter);
return handleEqual(unquote(eq.attribute()), eq.value());
} else {
EqualNullSafe eq = (EqualNullSafe) filter;
if (eq.value() == null) {
return isNull(unquote(eq.attribute()));
} else {
return handleEqual(unquote(eq.attribute()), eq.value());
}
}
case IN:
In inFilter = (In) filter;
return in(
unquote(inFilter.attribute()),
Stream.of(inFilter.values())
.filter(Objects::nonNull)
.map(SparkFilters::convertLiteral)
.collect(Collectors.toList()));
case NOT:
Not notFilter = (Not) filter;
Filter childFilter = notFilter.child();
Operation childOp = FILTERS.get(childFilter.getClass());
if (childOp == Operation.IN) {
// infer an extra notNull predicate for Spark NOT IN filters
// as Iceberg expressions don't follow the 3-value SQL boolean logic
// col NOT IN (1, 2) in Spark is equivalent to notNull(col) && notIn(col, 1, 2) in
// Iceberg
In childInFilter = (In) childFilter;
Expression notIn =
notIn(
unquote(childInFilter.attribute()),
Stream.of(childInFilter.values())
.map(SparkFilters::convertLiteral)
.collect(Collectors.toList()));
return and(notNull(childInFilter.attribute()), notIn);
} else if (hasNoInFilter(childFilter)) {
Expression child = convert(childFilter);
if (child != null) {
return not(child);
}
}
return null;
case AND:
{
And andFilter = (And) filter;
Expression left = convert(andFilter.left());
Expression right = convert(andFilter.right());
if (left != null && right != null) {
return and(left, right);
}
return null;
}
case OR:
{
Or orFilter = (Or) filter;
Expression left = convert(orFilter.left());
Expression right = convert(orFilter.right());
if (left != null && right != null) {
return or(left, right);
}
return null;
}
case STARTS_WITH:
{
StringStartsWith stringStartsWith = (StringStartsWith) filter;
return startsWith(unquote(stringStartsWith.attribute()), stringStartsWith.value());
}
}
}
return null;
}