in datafusion/physical-optimizer/src/pruning.rs [1436:1567]
fn build_predicate_expression(
expr: &Arc<dyn PhysicalExpr>,
schema: &Schema,
required_columns: &mut RequiredColumns,
unhandled_hook: &Arc<dyn UnhandledPredicateHook>,
) -> Arc<dyn PhysicalExpr> {
// predicate expression can only be a binary expression
let expr_any = expr.as_any();
if let Some(is_null) = expr_any.downcast_ref::<phys_expr::IsNullExpr>() {
return build_is_null_column_expr(is_null.arg(), schema, required_columns, false)
.unwrap_or_else(|| unhandled_hook.handle(expr));
}
if let Some(is_not_null) = expr_any.downcast_ref::<phys_expr::IsNotNullExpr>() {
return build_is_null_column_expr(
is_not_null.arg(),
schema,
required_columns,
true,
)
.unwrap_or_else(|| unhandled_hook.handle(expr));
}
if let Some(col) = expr_any.downcast_ref::<phys_expr::Column>() {
return build_single_column_expr(col, schema, required_columns, false)
.unwrap_or_else(|| unhandled_hook.handle(expr));
}
if let Some(not) = expr_any.downcast_ref::<phys_expr::NotExpr>() {
// match !col (don't do so recursively)
if let Some(col) = not.arg().as_any().downcast_ref::<phys_expr::Column>() {
return build_single_column_expr(col, schema, required_columns, true)
.unwrap_or_else(|| unhandled_hook.handle(expr));
} else {
return unhandled_hook.handle(expr);
}
}
if let Some(in_list) = expr_any.downcast_ref::<phys_expr::InListExpr>() {
if !in_list.list().is_empty()
&& in_list.list().len() <= MAX_LIST_VALUE_SIZE_REWRITE
{
let eq_op = if in_list.negated() {
Operator::NotEq
} else {
Operator::Eq
};
let re_op = if in_list.negated() {
Operator::And
} else {
Operator::Or
};
let change_expr = in_list
.list()
.iter()
.map(|e| {
Arc::new(phys_expr::BinaryExpr::new(
Arc::clone(in_list.expr()),
eq_op,
Arc::clone(e),
)) as _
})
.reduce(|a, b| Arc::new(phys_expr::BinaryExpr::new(a, re_op, b)) as _)
.unwrap();
return build_predicate_expression(
&change_expr,
schema,
required_columns,
unhandled_hook,
);
} else {
return unhandled_hook.handle(expr);
}
}
let (left, op, right) = {
if let Some(bin_expr) = expr_any.downcast_ref::<phys_expr::BinaryExpr>() {
(
Arc::clone(bin_expr.left()),
*bin_expr.op(),
Arc::clone(bin_expr.right()),
)
} else if let Some(like_expr) = expr_any.downcast_ref::<phys_expr::LikeExpr>() {
if like_expr.case_insensitive() {
return unhandled_hook.handle(expr);
}
let op = match (like_expr.negated(), like_expr.case_insensitive()) {
(false, false) => Operator::LikeMatch,
(true, false) => Operator::NotLikeMatch,
(false, true) => Operator::ILikeMatch,
(true, true) => Operator::NotILikeMatch,
};
(
Arc::clone(like_expr.expr()),
op,
Arc::clone(like_expr.pattern()),
)
} else {
return unhandled_hook.handle(expr);
}
};
if op == Operator::And || op == Operator::Or {
let left_expr =
build_predicate_expression(&left, schema, required_columns, unhandled_hook);
let right_expr =
build_predicate_expression(&right, schema, required_columns, unhandled_hook);
// simplify boolean expression if applicable
let expr = match (&left_expr, op, &right_expr) {
(left, Operator::And, _) if is_always_true(left) => right_expr,
(_, Operator::And, right) if is_always_true(right) => left_expr,
(left, Operator::Or, right)
if is_always_true(left) || is_always_true(right) =>
{
Arc::new(phys_expr::Literal::new(ScalarValue::Boolean(Some(true))))
}
_ => Arc::new(phys_expr::BinaryExpr::new(left_expr, op, right_expr)),
};
return expr;
}
let expr_builder =
PruningExpressionBuilder::try_new(&left, &right, op, schema, required_columns);
let mut expr_builder = match expr_builder {
Ok(builder) => builder,
// allow partial failure in predicate expression generation
// this can still produce a useful predicate when multiple conditions are joined using AND
Err(e) => {
debug!("Error building pruning expression: {e}");
return unhandled_hook.handle(expr);
}
};
build_statistics_expr(&mut expr_builder)
.unwrap_or_else(|_| unhandled_hook.handle(expr))
}