in datafusion/core/src/physical_optimizer/pruning.rs [731:831]
fn build_predicate_expression(
expr: &Arc<dyn PhysicalExpr>,
schema: &Schema,
required_columns: &mut RequiredStatColumns,
) -> Arc<dyn PhysicalExpr> {
// Returned for unsupported expressions. Such expressions are
// converted to TRUE.
let unhandled = Arc::new(phys_expr::Literal::new(ScalarValue::Boolean(Some(true))));
// predicate expression can only be a binary expression
let expr_any = expr.as_any();
if let Some(is_null) = expr_any.downcast_ref::<phys_expr::IsNullExpr>() {
return build_is_null_column_expr(is_null.arg(), schema, required_columns)
.unwrap_or(unhandled);
}
if let Some(col) = expr_any.downcast_ref::<phys_expr::Column>() {
return build_single_column_expr(col, schema, required_columns, false)
.unwrap_or(unhandled);
}
if let Some(not) = expr_any.downcast_ref::<phys_expr::NotExpr>() {
// match !col (don't do so recursively)
if let Some(col) = not.arg().as_any().downcast_ref::<phys_expr::Column>() {
return build_single_column_expr(col, schema, required_columns, true)
.unwrap_or(unhandled);
} else {
return unhandled;
}
}
if let Some(in_list) = expr_any.downcast_ref::<phys_expr::InListExpr>() {
if !in_list.list().is_empty() && in_list.list().len() < 20 {
let eq_op = if in_list.negated() {
Operator::NotEq
} else {
Operator::Eq
};
let re_op = if in_list.negated() {
Operator::And
} else {
Operator::Or
};
let change_expr = in_list
.list()
.iter()
.cloned()
.map(|e| {
Arc::new(phys_expr::BinaryExpr::new(
in_list.expr().clone(),
eq_op,
e.clone(),
)) as _
})
.reduce(|a, b| Arc::new(phys_expr::BinaryExpr::new(a, re_op, b)) as _)
.unwrap();
return build_predicate_expression(&change_expr, schema, required_columns);
} else {
return unhandled;
}
}
let (left, op, right) = {
if let Some(bin_expr) = expr_any.downcast_ref::<phys_expr::BinaryExpr>() {
(
bin_expr.left().clone(),
*bin_expr.op(),
bin_expr.right().clone(),
)
} else {
return unhandled;
}
};
if op == Operator::And || op == Operator::Or {
let left_expr = build_predicate_expression(&left, schema, required_columns);
let right_expr = build_predicate_expression(&right, schema, required_columns);
// simplify boolean expression if applicable
let expr = match (&left_expr, op, &right_expr) {
(left, Operator::And, _) if is_always_true(left) => right_expr,
(_, Operator::And, right) if is_always_true(right) => left_expr,
(left, Operator::Or, right)
if is_always_true(left) || is_always_true(right) =>
{
unhandled
}
_ => Arc::new(phys_expr::BinaryExpr::new(left_expr, op, right_expr)),
};
return expr;
}
let expr_builder =
PruningExpressionBuilder::try_new(&left, &right, op, schema, required_columns);
let mut expr_builder = match expr_builder {
Ok(builder) => builder,
// allow partial failure in predicate expression generation
// this can still produce a useful predicate when multiple conditions are joined using AND
Err(_) => {
return unhandled;
}
};
build_statistics_expr(&mut expr_builder).unwrap_or(unhandled)
}