in datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs [356:1194]
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
use datafusion_expr::Operator::{
And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
RegexNotIMatch, RegexNotMatch,
};
let info = self.info;
let new_expr = match expr {
//
// Rules for Eq
//
// true = A --> A
// false = A --> !A
// null = A --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Eq,
right,
}) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
match as_bool_lit(*left)? {
Some(true) => *right,
Some(false) => Expr::Not(right),
None => lit_bool_null(),
}
}
// A = true --> A
// A = false --> !A
// A = null --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Eq,
right,
}) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
match as_bool_lit(*right)? {
Some(true) => *left,
Some(false) => Expr::Not(left),
None => lit_bool_null(),
}
}
// expr IN () --> false
// expr NOT IN () --> true
Expr::InList(InList {
expr,
list,
negated,
}) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) => {
lit(negated)
}
// expr IN ((subquery)) -> expr IN (subquery), see ##5529
Expr::InList(InList {
expr,
mut list,
negated,
}) if list.len() == 1
&& matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
{
let Expr::ScalarSubquery(subquery) = list.remove(0) else { unreachable!() };
Expr::InSubquery(InSubquery::new(expr, subquery, negated))
}
// if expr is a single column reference:
// expr IN (A, B, ...) --> (expr = A) OR (expr = B) OR (expr = C)
Expr::InList(InList {
expr,
list,
negated,
}) if !list.is_empty()
&& (
// For lists with only 1 value we allow more complex expressions to be simplified
// e.g SUBSTR(c1, 2, 3) IN ('1') -> SUBSTR(c1, 2, 3) = '1'
// for more than one we avoid repeating this potentially expensive
// expressions
list.len() == 1
|| list.len() <= THRESHOLD_INLINE_INLIST
&& expr.try_into_col().is_ok()
) =>
{
let first_val = list[0].clone();
if negated {
list.into_iter().skip(1).fold(
(*expr.clone()).not_eq(first_val),
|acc, y| {
// Note that `A and B and C and D` is a left-deep tree structure
// as such we want to maintain this structure as much as possible
// to avoid reordering the expression during each optimization
// pass.
//
// Left-deep tree structure for `A and B and C and D`:
// ```
// &
// / \
// & D
// / \
// & C
// / \
// A B
// ```
//
// The code below maintain the left-deep tree structure.
acc.and((*expr.clone()).not_eq(y))
},
)
} else {
list.into_iter().skip(1).fold(
(*expr.clone()).eq(first_val),
|acc, y| {
// Same reasoning as above
acc.or((*expr.clone()).eq(y))
},
)
}
}
//
// Rules for NotEq
//
// true != A --> !A
// false != A --> A
// null != A --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: NotEq,
right,
}) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
match as_bool_lit(*left)? {
Some(true) => Expr::Not(right),
Some(false) => *right,
None => lit_bool_null(),
}
}
// A != true --> !A
// A != false --> A
// A != null --> null,
Expr::BinaryExpr(BinaryExpr {
left,
op: NotEq,
right,
}) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
match as_bool_lit(*right)? {
Some(true) => Expr::Not(left),
Some(false) => *left,
None => lit_bool_null(),
}
}
//
// Rules for OR
//
// true OR A --> true (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right: _,
}) if is_true(&left) => *left,
// false OR A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if is_false(&left) => *right,
// A OR true --> true (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left: _,
op: Or,
right,
}) if is_true(&right) => *right,
// A OR false --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if is_false(&right) => *left,
// A OR !A ---> true (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if is_not_of(&right, &left) && !info.nullable(&left)? => {
Expr::Literal(ScalarValue::Boolean(Some(true)))
}
// !A OR A ---> true (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if is_not_of(&left, &right) && !info.nullable(&right)? => {
Expr::Literal(ScalarValue::Boolean(Some(true)))
}
// (..A..) OR A --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if expr_contains(&left, &right, Or) => *left,
// A OR (..A..) --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if expr_contains(&right, &left, Or) => *right,
// A OR (A AND B) --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&right)? && is_op_with(And, &right, &left) => *left,
// (A AND B) OR A --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&left)? && is_op_with(And, &left, &right) => *right,
//
// Rules for AND
//
// true AND A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if is_true(&left) => *right,
// false AND A --> false (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right: _,
}) if is_false(&left) => *left,
// A AND true --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if is_true(&right) => *left,
// A AND false --> false (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left: _,
op: And,
right,
}) if is_false(&right) => *right,
// A AND !A ---> false (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if is_not_of(&right, &left) && !info.nullable(&left)? => {
Expr::Literal(ScalarValue::Boolean(Some(false)))
}
// !A AND A ---> false (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if is_not_of(&left, &right) && !info.nullable(&right)? => {
Expr::Literal(ScalarValue::Boolean(Some(false)))
}
// (..A..) AND A --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if expr_contains(&left, &right, And) => *left,
// A AND (..A..) --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if expr_contains(&right, &left, And) => *right,
// A AND (A OR B) --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => *left,
// (A OR B) AND A --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => *right,
//
// Rules for Multiply
//
// A * 1 --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Multiply,
right,
}) if is_one(&right) => *left,
// 1 * A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Multiply,
right,
}) if is_one(&left) => *right,
// A * null --> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: Multiply,
right,
}) if is_null(&right) => *right,
// null * A --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Multiply,
right: _,
}) if is_null(&left) => *left,
// A * 0 --> 0 (if A is not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: Multiply,
right,
}) if !info.nullable(&left)? && is_zero(&right) => *right,
// 0 * A --> 0 (if A is not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: Multiply,
right,
}) if !info.nullable(&right)? && is_zero(&left) => *left,
//
// Rules for Divide
//
// A / 1 --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Divide,
right,
}) if is_one(&right) => *left,
// null / A --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Divide,
right: _,
}) if is_null(&left) => *left,
// A / null --> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: Divide,
right,
}) if is_null(&right) => *right,
// 0 / 0 -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Divide,
right,
}) if is_zero(&left) && is_zero(&right) => {
Expr::Literal(ScalarValue::Int32(None))
}
// A / 0 -> DivideByZero Error
Expr::BinaryExpr(BinaryExpr {
left,
op: Divide,
right,
}) if !info.nullable(&left)? && is_zero(&right) => {
return Err(DataFusionError::ArrowError(ArrowError::DivideByZero));
}
//
// Rules for Modulo
//
// A % null --> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: Modulo,
right,
}) if is_null(&right) => *right,
// null % A --> null
Expr::BinaryExpr(BinaryExpr {
left,
op: Modulo,
right: _,
}) if is_null(&left) => *left,
// A % 1 --> 0
Expr::BinaryExpr(BinaryExpr {
left,
op: Modulo,
right,
}) if !info.nullable(&left)? && is_one(&right) => lit(0),
// A % 0 --> DivideByZero Error
Expr::BinaryExpr(BinaryExpr {
left,
op: Modulo,
right,
}) if !info.nullable(&left)? && is_zero(&right) => {
return Err(DataFusionError::ArrowError(ArrowError::DivideByZero));
}
//
// Rules for BitwiseAnd
//
// A & null -> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: BitwiseAnd,
right,
}) if is_null(&right) => *right,
// null & A -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right: _,
}) if is_null(&left) => *left,
// A & 0 -> 0 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if !info.nullable(&left)? && is_zero(&right) => *right,
// 0 & A -> 0 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if !info.nullable(&right)? && is_zero(&left) => *left,
// !A & A -> 0 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
}
// A & !A -> 0 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
}
// (..A..) & A --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if expr_contains(&left, &right, BitwiseAnd) => *left,
// A & (..A..) --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if expr_contains(&right, &left, BitwiseAnd) => *right,
// A & (A | B) --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
*left
}
// (A | B) & A --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseAnd,
right,
}) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
*right
}
//
// Rules for BitwiseOr
//
// A | null -> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: BitwiseOr,
right,
}) if is_null(&right) => *right,
// null | A -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right: _,
}) if is_null(&left) => *left,
// A | 0 -> A (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if is_zero(&right) => *left,
// 0 | A -> A (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if is_zero(&left) => *right,
// !A | A -> -1 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
}
// A | !A -> -1 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
}
// (..A..) | A --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if expr_contains(&left, &right, BitwiseOr) => *left,
// A | (..A..) --> (..A..)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if expr_contains(&right, &left, BitwiseOr) => *right,
// A | (A & B) --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
*left
}
// (A & B) | A --> A (if B not null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseOr,
right,
}) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
*right
}
//
// Rules for BitwiseXor
//
// A ^ null -> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: BitwiseXor,
right,
}) if is_null(&right) => *right,
// null ^ A -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right: _,
}) if is_null(&left) => *left,
// A ^ 0 -> A (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if !info.nullable(&left)? && is_zero(&right) => *left,
// 0 ^ A -> A (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if !info.nullable(&right)? && is_zero(&left) => *right,
// !A ^ A -> -1 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
}
// A ^ !A -> -1 (if A not nullable)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?)
}
// (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if expr_contains(&left, &right, BitwiseXor) => {
let expr = delete_xor_in_complex_expr(&left, &right, false);
if expr == *right {
Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&right)?)?)
} else {
expr
}
}
// A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseXor,
right,
}) if expr_contains(&right, &left, BitwiseXor) => {
let expr = delete_xor_in_complex_expr(&right, &left, true);
if expr == *left {
Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?)
} else {
expr
}
}
//
// Rules for BitwiseShiftRight
//
// A >> null -> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: BitwiseShiftRight,
right,
}) if is_null(&right) => *right,
// null >> A -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseShiftRight,
right: _,
}) if is_null(&left) => *left,
// A >> 0 -> A (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseShiftRight,
right,
}) if is_zero(&right) => *left,
//
// Rules for BitwiseShiftRight
//
// A << null -> null
Expr::BinaryExpr(BinaryExpr {
left: _,
op: BitwiseShiftLeft,
right,
}) if is_null(&right) => *right,
// null << A -> null
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseShiftLeft,
right: _,
}) if is_null(&left) => *left,
// A << 0 -> A (even if A is null)
Expr::BinaryExpr(BinaryExpr {
left,
op: BitwiseShiftLeft,
right,
}) if is_zero(&right) => *left,
//
// Rules for Not
//
Expr::Not(inner) => negate_clause(*inner),
//
// Rules for Negative
//
Expr::Negative(inner) => distribute_negation(*inner),
//
// Rules for Case
//
// CASE
// WHEN X THEN A
// WHEN Y THEN B
// ...
// ELSE Q
// END
//
// ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
//
// Note: the rationale for this rewrite is that the expr can then be further
// simplified using the existing rules for AND/OR
Expr::Case(Case {
expr: None,
when_then_expr,
else_expr,
}) if !when_then_expr.is_empty()
&& when_then_expr.len() < 3 // The rewrite is O(n!) so limit to small number
&& info.is_boolean_type(&when_then_expr[0].1)? =>
{
// The disjunction of all the when predicates encountered so far
let mut filter_expr = lit(false);
// The disjunction of all the cases
let mut out_expr = lit(false);
for (when, then) in when_then_expr {
let case_expr = when
.as_ref()
.clone()
.and(filter_expr.clone().not())
.and(*then);
out_expr = out_expr.or(case_expr);
filter_expr = filter_expr.or(*when);
}
if let Some(else_expr) = else_expr {
let case_expr = filter_expr.not().and(*else_expr);
out_expr = out_expr.or(case_expr);
}
// Do a first pass at simplification
out_expr.rewrite(self)?
}
// log
Expr::ScalarFunction(ScalarFunction {
fun: BuiltinScalarFunction::Log,
args,
}) => simpl_log(args, <&S>::clone(&info))?,
// power
Expr::ScalarFunction(ScalarFunction {
fun: BuiltinScalarFunction::Power,
args,
}) => simpl_power(args, <&S>::clone(&info))?,
// concat
Expr::ScalarFunction(ScalarFunction {
fun: BuiltinScalarFunction::Concat,
args,
}) => simpl_concat(args)?,
// concat_ws
Expr::ScalarFunction(ScalarFunction {
fun: BuiltinScalarFunction::ConcatWithSeparator,
args,
}) => match &args[..] {
[delimiter, vals @ ..] => simpl_concat_ws(delimiter, vals)?,
_ => Expr::ScalarFunction(ScalarFunction::new(
BuiltinScalarFunction::ConcatWithSeparator,
args,
)),
},
//
// Rules for Between
//
// a between 3 and 5 --> a >= 3 AND a <=5
// a not between 3 and 5 --> a < 3 OR a > 5
Expr::Between(between) => {
if between.negated {
let l = *between.expr.clone();
let r = *between.expr;
or(l.lt(*between.low), r.gt(*between.high))
} else {
and(
between.expr.clone().gt_eq(*between.low),
between.expr.lt_eq(*between.high),
)
}
}
//
// Rules for regexes
//
Expr::BinaryExpr(BinaryExpr {
left,
op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
right,
}) => simplify_regex_expr(left, op, right)?,
// Rules for Like
Expr::Like(Like {
expr,
pattern,
negated,
escape_char: _,
case_insensitive: _,
}) if !is_null(&expr)
&& matches!(
pattern.as_ref(),
Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%"
) =>
{
lit(!negated)
}
// a is not null/unknown --> true (if a is not nullable)
Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
if !info.nullable(&expr)? =>
{
lit(true)
}
// a is null/unknown --> false (if a is not nullable)
Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
lit(false)
}
// no additional rewrites possible
expr => expr,
};
Ok(new_expr)
}