fn build_predicate_expression()

in datafusion/core/src/physical_optimizer/pruning.rs [731:831]


fn build_predicate_expression(
    expr: &Arc<dyn PhysicalExpr>,
    schema: &Schema,
    required_columns: &mut RequiredStatColumns,
) -> Arc<dyn PhysicalExpr> {
    // Returned for unsupported expressions. Such expressions are
    // converted to TRUE.
    let unhandled = Arc::new(phys_expr::Literal::new(ScalarValue::Boolean(Some(true))));

    // predicate expression can only be a binary expression
    let expr_any = expr.as_any();
    if let Some(is_null) = expr_any.downcast_ref::<phys_expr::IsNullExpr>() {
        return build_is_null_column_expr(is_null.arg(), schema, required_columns)
            .unwrap_or(unhandled);
    }
    if let Some(col) = expr_any.downcast_ref::<phys_expr::Column>() {
        return build_single_column_expr(col, schema, required_columns, false)
            .unwrap_or(unhandled);
    }
    if let Some(not) = expr_any.downcast_ref::<phys_expr::NotExpr>() {
        // match !col (don't do so recursively)
        if let Some(col) = not.arg().as_any().downcast_ref::<phys_expr::Column>() {
            return build_single_column_expr(col, schema, required_columns, true)
                .unwrap_or(unhandled);
        } else {
            return unhandled;
        }
    }
    if let Some(in_list) = expr_any.downcast_ref::<phys_expr::InListExpr>() {
        if !in_list.list().is_empty() && in_list.list().len() < 20 {
            let eq_op = if in_list.negated() {
                Operator::NotEq
            } else {
                Operator::Eq
            };
            let re_op = if in_list.negated() {
                Operator::And
            } else {
                Operator::Or
            };
            let change_expr = in_list
                .list()
                .iter()
                .cloned()
                .map(|e| {
                    Arc::new(phys_expr::BinaryExpr::new(
                        in_list.expr().clone(),
                        eq_op,
                        e.clone(),
                    )) as _
                })
                .reduce(|a, b| Arc::new(phys_expr::BinaryExpr::new(a, re_op, b)) as _)
                .unwrap();
            return build_predicate_expression(&change_expr, schema, required_columns);
        } else {
            return unhandled;
        }
    }

    let (left, op, right) = {
        if let Some(bin_expr) = expr_any.downcast_ref::<phys_expr::BinaryExpr>() {
            (
                bin_expr.left().clone(),
                *bin_expr.op(),
                bin_expr.right().clone(),
            )
        } else {
            return unhandled;
        }
    };

    if op == Operator::And || op == Operator::Or {
        let left_expr = build_predicate_expression(&left, schema, required_columns);
        let right_expr = build_predicate_expression(&right, schema, required_columns);
        // simplify boolean expression if applicable
        let expr = match (&left_expr, op, &right_expr) {
            (left, Operator::And, _) if is_always_true(left) => right_expr,
            (_, Operator::And, right) if is_always_true(right) => left_expr,
            (left, Operator::Or, right)
                if is_always_true(left) || is_always_true(right) =>
            {
                unhandled
            }
            _ => Arc::new(phys_expr::BinaryExpr::new(left_expr, op, right_expr)),
        };
        return expr;
    }

    let expr_builder =
        PruningExpressionBuilder::try_new(&left, &right, op, schema, required_columns);
    let mut expr_builder = match expr_builder {
        Ok(builder) => builder,
        // allow partial failure in predicate expression generation
        // this can still produce a useful predicate when multiple conditions are joined using AND
        Err(_) => {
            return unhandled;
        }
    };

    build_statistics_expr(&mut expr_builder).unwrap_or(unhandled)
}