in datafusion/optimizer/src/rewrite_disjunctive_predicate.rs [269:354]
fn delete_duplicate_predicates(or_predicates: &[Predicate]) -> Predicate {
let mut shortest_exprs: Vec<Predicate> = vec![];
let mut shortest_exprs_len = 0;
// choose the shortest AND predicate
for or_predicate in or_predicates.iter() {
match or_predicate {
Predicate::And { args } => {
let args_num = args.len();
if shortest_exprs.is_empty() || args_num < shortest_exprs_len {
shortest_exprs = (*args).clone();
shortest_exprs_len = args_num;
}
}
_ => {
// if there is no AND predicate, it must be the shortest expression.
shortest_exprs = vec![or_predicate.clone()];
break;
}
}
}
// dedup shortest_exprs
shortest_exprs.dedup();
// Check each element in shortest_exprs to see if it's in all the OR arguments.
let mut exist_exprs: Vec<Predicate> = vec![];
for expr in shortest_exprs.iter() {
let found = or_predicates.iter().all(|or_predicate| match or_predicate {
Predicate::And { args } => args.contains(expr),
_ => or_predicate == expr,
});
if found {
exist_exprs.push((*expr).clone());
}
}
if exist_exprs.is_empty() {
return Predicate::Or {
args: or_predicates.to_vec(),
};
}
// Rebuild the OR predicate.
// (A AND B) OR A will be optimized to A.
let mut new_or_predicates = vec![];
for or_predicate in or_predicates.iter() {
match or_predicate {
Predicate::And { args } => {
let mut new_args = (*args).clone();
new_args.retain(|expr| !exist_exprs.contains(expr));
if !new_args.is_empty() {
if new_args.len() == 1 {
new_or_predicates.push(new_args[0].clone());
} else {
new_or_predicates.push(Predicate::And { args: new_args });
}
} else {
new_or_predicates.clear();
break;
}
}
_ => {
if exist_exprs.contains(or_predicate) {
new_or_predicates.clear();
break;
}
}
}
}
if !new_or_predicates.is_empty() {
if new_or_predicates.len() == 1 {
exist_exprs.push(new_or_predicates[0].clone());
} else {
exist_exprs.push(Predicate::Or {
args: flatten_or_predicates(new_or_predicates),
});
}
}
if exist_exprs.len() == 1 {
exist_exprs[0].clone()
} else {
Predicate::And {
args: flatten_and_predicates(exist_exprs),
}
}
}