in datafusion/optimizer/src/eliminate_outer_join.rs [166:295]
fn extract_non_nullable_columns(
expr: &Expr,
non_nullable_cols: &mut Vec<Column>,
left_schema: &Arc<DFSchema>,
right_schema: &Arc<DFSchema>,
top_level: bool,
) -> Result<()> {
match expr {
Expr::Column(col) => {
non_nullable_cols.push(col.clone());
Ok(())
}
Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op {
// If one of the inputs are null for these operators, the results should be false.
Operator::Eq
| Operator::NotEq
| Operator::Lt
| Operator::LtEq
| Operator::Gt
| Operator::GtEq => {
extract_non_nullable_columns(
left,
non_nullable_cols,
left_schema,
right_schema,
false,
)?;
extract_non_nullable_columns(
right,
non_nullable_cols,
left_schema,
right_schema,
false,
)
}
Operator::And | Operator::Or => {
// treat And as Or if does not from top level, such as
// not (c1 < 10 and c2 > 100)
if top_level && *op == Operator::And {
extract_non_nullable_columns(
left,
non_nullable_cols,
left_schema,
right_schema,
top_level,
)?;
extract_non_nullable_columns(
right,
non_nullable_cols,
left_schema,
right_schema,
top_level,
)?;
return Ok(());
}
let mut left_non_nullable_cols: Vec<Column> = vec![];
let mut right_non_nullable_cols: Vec<Column> = vec![];
extract_non_nullable_columns(
left,
&mut left_non_nullable_cols,
left_schema,
right_schema,
top_level,
)?;
extract_non_nullable_columns(
right,
&mut right_non_nullable_cols,
left_schema,
right_schema,
top_level,
)?;
// for query: select *** from a left join b where b.c1 ... or b.c2 ...
// this can be eliminated to inner join.
// for query: select *** from a left join b where a.c1 ... or b.c2 ...
// this can not be eliminated.
// If columns of relation exist in both sub exprs, any columns of this relation
// can be added to non nullable columns.
if !left_non_nullable_cols.is_empty()
&& !right_non_nullable_cols.is_empty()
{
for left_col in &left_non_nullable_cols {
for right_col in &right_non_nullable_cols {
if (left_schema.has_column(left_col)
&& left_schema.has_column(right_col))
|| (right_schema.has_column(left_col)
&& right_schema.has_column(right_col))
{
non_nullable_cols.push(left_col.clone());
break;
}
}
}
}
Ok(())
}
_ => Ok(()),
},
Expr::Not(arg) => extract_non_nullable_columns(
arg,
non_nullable_cols,
left_schema,
right_schema,
false,
),
Expr::IsNotNull(arg) => {
if !top_level {
return Ok(());
}
extract_non_nullable_columns(
arg,
non_nullable_cols,
left_schema,
right_schema,
false,
)
}
Expr::Cast(Cast { expr, data_type: _ })
| Expr::TryCast(TryCast { expr, data_type: _ }) => extract_non_nullable_columns(
expr,
non_nullable_cols,
left_schema,
right_schema,
false,
),
_ => Ok(()),
}
}