fn mutate()

in datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs [356:1194]


    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
        use datafusion_expr::Operator::{
            And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
            Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
            RegexNotIMatch, RegexNotMatch,
        };

        let info = self.info;
        let new_expr = match expr {
            //
            // Rules for Eq
            //

            // true = A  --> A
            // false = A --> !A
            // null = A --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Eq,
                right,
            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
                match as_bool_lit(*left)? {
                    Some(true) => *right,
                    Some(false) => Expr::Not(right),
                    None => lit_bool_null(),
                }
            }
            // A = true  --> A
            // A = false --> !A
            // A = null --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Eq,
                right,
            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
                match as_bool_lit(*right)? {
                    Some(true) => *left,
                    Some(false) => Expr::Not(left),
                    None => lit_bool_null(),
                }
            }
            // expr IN () --> false
            // expr NOT IN () --> true
            Expr::InList(InList {
                expr,
                list,
                negated,
            }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => {
                lit(negated)
            }

            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
            Expr::InList(InList {
                expr,
                mut list,
                negated,
            }) if list.len() == 1
                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
            {
                let Expr::ScalarSubquery(subquery) = list.remove(0) else { unreachable!() };
                Expr::InSubquery(InSubquery::new(expr, subquery, negated))
            }

            // if expr is a single column reference:
            // expr IN (A, B, ...) --> (expr = A) OR (expr = B) OR (expr = C)
            Expr::InList(InList {
                expr,
                list,
                negated,
            }) if !list.is_empty()
                && (
                    // For lists with only 1 value we allow more complex expressions to be simplified
                    // e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
                    // for more than one we avoid repeating this potentially expensive
                    // expressions
                    list.len() == 1
                        || list.len() <= THRESHOLD_INLINE_INLIST
                            && expr.try_into_col().is_ok()
                ) =>
            {
                let first_val = list[0].clone();
                if negated {
                    list.into_iter().skip(1).fold(
                        (*expr.clone()).not_eq(first_val),
                        |acc, y| {
                            // Note that `A and B and C and D` is a left-deep tree structure
                            // as such we want to maintain this structure as much as possible
                            // to avoid reordering the expression during each optimization
                            // pass.
                            //
                            // Left-deep tree structure for `A and B and C and D`:
                            // ```
                            //        &
                            //       / \
                            //      &   D
                            //     / \
                            //    &   C
                            //   / \
                            //  A   B
                            // ```
                            //
                            // The code below maintain the left-deep tree structure.
                            acc.and((*expr.clone()).not_eq(y))
                        },
                    )
                } else {
                    list.into_iter().skip(1).fold(
                        (*expr.clone()).eq(first_val),
                        |acc, y| {
                            // Same reasoning as above
                            acc.or((*expr.clone()).eq(y))
                        },
                    )
                }
            }
            //
            // Rules for NotEq
            //

            // true != A  --> !A
            // false != A --> A
            // null != A --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: NotEq,
                right,
            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
                match as_bool_lit(*left)? {
                    Some(true) => Expr::Not(right),
                    Some(false) => *right,
                    None => lit_bool_null(),
                }
            }
            // A != true  --> !A
            // A != false --> A
            // A != null --> null,
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: NotEq,
                right,
            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
                match as_bool_lit(*right)? {
                    Some(true) => Expr::Not(left),
                    Some(false) => *left,
                    None => lit_bool_null(),
                }
            }

            //
            // Rules for OR
            //

            // true OR A --> true (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right: _,
            }) if is_true(&left) => *left,
            // false OR A --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if is_false(&left) => *right,
            // A OR true --> true (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: Or,
                right,
            }) if is_true(&right) => *right,
            // A OR false --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if is_false(&right) => *left,
            // A OR !A ---> true (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
                Expr::Literal(ScalarValue::Boolean(Some(true)))
            }
            // !A OR A ---> true (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
                Expr::Literal(ScalarValue::Boolean(Some(true)))
            }
            // (..A..) OR A --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if expr_contains(&left, &right, Or) => *left,
            // A OR (..A..) --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if expr_contains(&right, &left, Or) => *right,
            // A OR (A AND B) --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if !info.nullable(&right)? && is_op_with(And, &right, &left) => *left,
            // (A AND B) OR A --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Or,
                right,
            }) if !info.nullable(&left)? && is_op_with(And, &left, &right) => *right,

            //
            // Rules for AND
            //

            // true AND A --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if is_true(&left) => *right,
            // false AND A --> false (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right: _,
            }) if is_false(&left) => *left,
            // A AND true --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if is_true(&right) => *left,
            // A AND false --> false (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: And,
                right,
            }) if is_false(&right) => *right,
            // A AND !A ---> false (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
                Expr::Literal(ScalarValue::Boolean(Some(false)))
            }
            // !A AND A ---> false (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
                Expr::Literal(ScalarValue::Boolean(Some(false)))
            }
            // (..A..) AND A --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if expr_contains(&left, &right, And) => *left,
            // A AND (..A..) --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if expr_contains(&right, &left, And) => *right,
            // A AND (A OR B) --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => *left,
            // (A OR B) AND A --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: And,
                right,
            }) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => *right,

            //
            // Rules for Multiply
            //

            // A * 1 --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Multiply,
                right,
            }) if is_one(&right) => *left,
            // 1 * A --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Multiply,
                right,
            }) if is_one(&left) => *right,
            // A * null --> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: Multiply,
                right,
            }) if is_null(&right) => *right,
            // null * A --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Multiply,
                right: _,
            }) if is_null(&left) => *left,

            // A * 0 --> 0 (if A is not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Multiply,
                right,
            }) if !info.nullable(&left)? && is_zero(&right) => *right,
            // 0 * A --> 0 (if A is not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Multiply,
                right,
            }) if !info.nullable(&right)? && is_zero(&left) => *left,

            //
            // Rules for Divide
            //

            // A / 1 --> A
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Divide,
                right,
            }) if is_one(&right) => *left,
            // null / A --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Divide,
                right: _,
            }) if is_null(&left) => *left,
            // A / null --> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: Divide,
                right,
            }) if is_null(&right) => *right,
            // 0 / 0 -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Divide,
                right,
            }) if is_zero(&left) && is_zero(&right) => {
                Expr::Literal(ScalarValue::Int32(None))
            }
            // A / 0 -> DivideByZero Error
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Divide,
                right,
            }) if !info.nullable(&left)? && is_zero(&right) => {
                return Err(DataFusionError::ArrowError(ArrowError::DivideByZero));
            }

            //
            // Rules for Modulo
            //

            // A % null --> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: Modulo,
                right,
            }) if is_null(&right) => *right,
            // null % A --> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Modulo,
                right: _,
            }) if is_null(&left) => *left,
            // A % 1 --> 0
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Modulo,
                right,
            }) if !info.nullable(&left)? && is_one(&right) => lit(0),
            // A % 0 --> DivideByZero Error
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: Modulo,
                right,
            }) if !info.nullable(&left)? && is_zero(&right) => {
                return Err(DataFusionError::ArrowError(ArrowError::DivideByZero));
            }

            //
            // Rules for BitwiseAnd
            //

            // A & null -> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: BitwiseAnd,
                right,
            }) if is_null(&right) => *right,

            // null & A -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right: _,
            }) if is_null(&left) => *left,

            // A & 0 -> 0 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if !info.nullable(&left)? && is_zero(&right) => *right,

            // 0 & A -> 0 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if !info.nullable(&right)? && is_zero(&left) => *left,

            // !A & A -> 0 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
                Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
            }

            // A & !A -> 0 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
                Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
            }

            // (..A..) & A --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if expr_contains(&left, &right, BitwiseAnd) => *left,

            // A & (..A..) --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if expr_contains(&right, &left, BitwiseAnd) => *right,

            // A & (A | B) --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
                *left
            }

            // (A | B) & A --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseAnd,
                right,
            }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
                *right
            }

            //
            // Rules for BitwiseOr
            //

            // A | null -> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: BitwiseOr,
                right,
            }) if is_null(&right) => *right,

            // null | A -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right: _,
            }) if is_null(&left) => *left,

            // A | 0 -> A (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if is_zero(&right) => *left,

            // 0 | A -> A (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if is_zero(&left) => *right,

            // !A | A -> -1 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
                Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
            }

            // A | !A -> -1 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
                Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
            }

            // (..A..) | A --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if expr_contains(&left, &right, BitwiseOr) => *left,

            // A | (..A..) --> (..A..)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if expr_contains(&right, &left, BitwiseOr) => *right,

            // A | (A & B) --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
                *left
            }

            // (A & B) | A --> A (if B not null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseOr,
                right,
            }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
                *right
            }

            //
            // Rules for BitwiseXor
            //

            // A ^ null -> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: BitwiseXor,
                right,
            }) if is_null(&right) => *right,

            // null ^ A -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right: _,
            }) if is_null(&left) => *left,

            // A ^ 0 -> A (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if !info.nullable(&left)? && is_zero(&right) => *left,

            // 0 ^ A -> A (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if !info.nullable(&right)? && is_zero(&left) => *right,

            // !A ^ A -> -1 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
                Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
            }

            // A ^ !A -> -1 (if A not nullable)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
                Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
            }

            // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if expr_contains(&left, &right, BitwiseXor) => {
                let expr = delete_xor_in_complex_expr(&left, &right, false);
                if expr == *right {
                    Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&right)?)?)
                } else {
                    expr
                }
            }

            // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseXor,
                right,
            }) if expr_contains(&right, &left, BitwiseXor) => {
                let expr = delete_xor_in_complex_expr(&right, &left, true);
                if expr == *left {
                    Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
                } else {
                    expr
                }
            }

            //
            // Rules for BitwiseShiftRight
            //

            // A >> null -> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: BitwiseShiftRight,
                right,
            }) if is_null(&right) => *right,

            // null >> A -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseShiftRight,
                right: _,
            }) if is_null(&left) => *left,

            // A >> 0 -> A (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseShiftRight,
                right,
            }) if is_zero(&right) => *left,

            //
            // Rules for BitwiseShiftRight
            //

            // A << null -> null
            Expr::BinaryExpr(BinaryExpr {
                left: _,
                op: BitwiseShiftLeft,
                right,
            }) if is_null(&right) => *right,

            // null << A -> null
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseShiftLeft,
                right: _,
            }) if is_null(&left) => *left,

            // A << 0 -> A (even if A is null)
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: BitwiseShiftLeft,
                right,
            }) if is_zero(&right) => *left,

            //
            // Rules for Not
            //
            Expr::Not(inner) => negate_clause(*inner),

            //
            // Rules for Negative
            //
            Expr::Negative(inner) => distribute_negation(*inner),

            //
            // Rules for Case
            //

            // CASE
            //   WHEN X THEN A
            //   WHEN Y THEN B
            //   ...
            //   ELSE Q
            // END
            //
            // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
            //
            // Note: the rationale for this rewrite is that the expr can then be further
            // simplified using the existing rules for AND/OR
            Expr::Case(Case {
                expr: None,
                when_then_expr,
                else_expr,
            }) if !when_then_expr.is_empty()
                && when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
                && info.is_boolean_type(&when_then_expr[0].1)? =>
            {
                // The disjunction of all the when predicates encountered so far
                let mut filter_expr = lit(false);
                // The disjunction of all the cases
                let mut out_expr = lit(false);

                for (when, then) in when_then_expr {
                    let case_expr = when
                        .as_ref()
                        .clone()
                        .and(filter_expr.clone().not())
                        .and(*then);

                    out_expr = out_expr.or(case_expr);
                    filter_expr = filter_expr.or(*when);
                }

                if let Some(else_expr) = else_expr {
                    let case_expr = filter_expr.not().and(*else_expr);
                    out_expr = out_expr.or(case_expr);
                }

                // Do a first pass at simplification
                out_expr.rewrite(self)?
            }

            // log
            Expr::ScalarFunction(ScalarFunction {
                fun: BuiltinScalarFunction::Log,
                args,
            }) => simpl_log(args, <&S>::clone(&info))?,

            // power
            Expr::ScalarFunction(ScalarFunction {
                fun: BuiltinScalarFunction::Power,
                args,
            }) => simpl_power(args, <&S>::clone(&info))?,

            // concat
            Expr::ScalarFunction(ScalarFunction {
                fun: BuiltinScalarFunction::Concat,
                args,
            }) => simpl_concat(args)?,

            // concat_ws
            Expr::ScalarFunction(ScalarFunction {
                fun: BuiltinScalarFunction::ConcatWithSeparator,
                args,
            }) => match &args[..] {
                [delimiter, vals @ ..] => simpl_concat_ws(delimiter, vals)?,
                _ => Expr::ScalarFunction(ScalarFunction::new(
                    BuiltinScalarFunction::ConcatWithSeparator,
                    args,
                )),
            },

            //
            // Rules for Between
            //

            // a between 3 and 5  -->  a >= 3 AND a <=5
            // a not between 3 and 5  -->  a < 3 OR a > 5
            Expr::Between(between) => {
                if between.negated {
                    let l = *between.expr.clone();
                    let r = *between.expr;
                    or(l.lt(*between.low), r.gt(*between.high))
                } else {
                    and(
                        between.expr.clone().gt_eq(*between.low),
                        between.expr.lt_eq(*between.high),
                    )
                }
            }

            //
            // Rules for regexes
            //
            Expr::BinaryExpr(BinaryExpr {
                left,
                op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
                right,
            }) => simplify_regex_expr(left, op, right)?,

            // Rules for Like
            Expr::Like(Like {
                expr,
                pattern,
                negated,
                escape_char: _,
                case_insensitive: _,
            }) if !is_null(&expr)
                && matches!(
                    pattern.as_ref(),
                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%"
                ) =>
            {
                lit(!negated)
            }

            // a is not null/unknown --> true (if a is not nullable)
            Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
                if !info.nullable(&expr)? =>
            {
                lit(true)
            }

            // a is null/unknown --> false (if a is not nullable)
            Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
                lit(false)
            }

            // no additional rewrites possible
            expr => expr,
        };
        Ok(new_expr)
    }