fn build_predicate_expression()

in datafusion/physical-optimizer/src/pruning.rs [1436:1567]


fn build_predicate_expression(
    expr: &Arc<dyn PhysicalExpr>,
    schema: &Schema,
    required_columns: &mut RequiredColumns,
    unhandled_hook: &Arc<dyn UnhandledPredicateHook>,
) -> Arc<dyn PhysicalExpr> {
    // predicate expression can only be a binary expression
    let expr_any = expr.as_any();
    if let Some(is_null) = expr_any.downcast_ref::<phys_expr::IsNullExpr>() {
        return build_is_null_column_expr(is_null.arg(), schema, required_columns, false)
            .unwrap_or_else(|| unhandled_hook.handle(expr));
    }
    if let Some(is_not_null) = expr_any.downcast_ref::<phys_expr::IsNotNullExpr>() {
        return build_is_null_column_expr(
            is_not_null.arg(),
            schema,
            required_columns,
            true,
        )
        .unwrap_or_else(|| unhandled_hook.handle(expr));
    }
    if let Some(col) = expr_any.downcast_ref::<phys_expr::Column>() {
        return build_single_column_expr(col, schema, required_columns, false)
            .unwrap_or_else(|| unhandled_hook.handle(expr));
    }
    if let Some(not) = expr_any.downcast_ref::<phys_expr::NotExpr>() {
        // match !col (don't do so recursively)
        if let Some(col) = not.arg().as_any().downcast_ref::<phys_expr::Column>() {
            return build_single_column_expr(col, schema, required_columns, true)
                .unwrap_or_else(|| unhandled_hook.handle(expr));
        } else {
            return unhandled_hook.handle(expr);
        }
    }
    if let Some(in_list) = expr_any.downcast_ref::<phys_expr::InListExpr>() {
        if !in_list.list().is_empty()
            && in_list.list().len() <= MAX_LIST_VALUE_SIZE_REWRITE
        {
            let eq_op = if in_list.negated() {
                Operator::NotEq
            } else {
                Operator::Eq
            };
            let re_op = if in_list.negated() {
                Operator::And
            } else {
                Operator::Or
            };
            let change_expr = in_list
                .list()
                .iter()
                .map(|e| {
                    Arc::new(phys_expr::BinaryExpr::new(
                        Arc::clone(in_list.expr()),
                        eq_op,
                        Arc::clone(e),
                    )) as _
                })
                .reduce(|a, b| Arc::new(phys_expr::BinaryExpr::new(a, re_op, b)) as _)
                .unwrap();
            return build_predicate_expression(
                &change_expr,
                schema,
                required_columns,
                unhandled_hook,
            );
        } else {
            return unhandled_hook.handle(expr);
        }
    }

    let (left, op, right) = {
        if let Some(bin_expr) = expr_any.downcast_ref::<phys_expr::BinaryExpr>() {
            (
                Arc::clone(bin_expr.left()),
                *bin_expr.op(),
                Arc::clone(bin_expr.right()),
            )
        } else if let Some(like_expr) = expr_any.downcast_ref::<phys_expr::LikeExpr>() {
            if like_expr.case_insensitive() {
                return unhandled_hook.handle(expr);
            }
            let op = match (like_expr.negated(), like_expr.case_insensitive()) {
                (false, false) => Operator::LikeMatch,
                (true, false) => Operator::NotLikeMatch,
                (false, true) => Operator::ILikeMatch,
                (true, true) => Operator::NotILikeMatch,
            };
            (
                Arc::clone(like_expr.expr()),
                op,
                Arc::clone(like_expr.pattern()),
            )
        } else {
            return unhandled_hook.handle(expr);
        }
    };

    if op == Operator::And || op == Operator::Or {
        let left_expr =
            build_predicate_expression(&left, schema, required_columns, unhandled_hook);
        let right_expr =
            build_predicate_expression(&right, schema, required_columns, unhandled_hook);
        // simplify boolean expression if applicable
        let expr = match (&left_expr, op, &right_expr) {
            (left, Operator::And, _) if is_always_true(left) => right_expr,
            (_, Operator::And, right) if is_always_true(right) => left_expr,
            (left, Operator::Or, right)
                if is_always_true(left) || is_always_true(right) =>
            {
                Arc::new(phys_expr::Literal::new(ScalarValue::Boolean(Some(true))))
            }
            _ => Arc::new(phys_expr::BinaryExpr::new(left_expr, op, right_expr)),
        };
        return expr;
    }

    let expr_builder =
        PruningExpressionBuilder::try_new(&left, &right, op, schema, required_columns);
    let mut expr_builder = match expr_builder {
        Ok(builder) => builder,
        // allow partial failure in predicate expression generation
        // this can still produce a useful predicate when multiple conditions are joined using AND
        Err(e) => {
            debug!("Error building pruning expression: {e}");
            return unhandled_hook.handle(expr);
        }
    };

    build_statistics_expr(&mut expr_builder)
        .unwrap_or_else(|_| unhandled_hook.handle(expr))
}