fn delete_duplicate_predicates()

in datafusion/optimizer/src/rewrite_disjunctive_predicate.rs [269:354]


fn delete_duplicate_predicates(or_predicates: &[Predicate]) -> Predicate {
    let mut shortest_exprs: Vec<Predicate> = vec![];
    let mut shortest_exprs_len = 0;
    // choose the shortest AND predicate
    for or_predicate in or_predicates.iter() {
        match or_predicate {
            Predicate::And { args } => {
                let args_num = args.len();
                if shortest_exprs.is_empty() || args_num < shortest_exprs_len {
                    shortest_exprs = (*args).clone();
                    shortest_exprs_len = args_num;
                }
            }
            _ => {
                // if there is no AND predicate, it must be the shortest expression.
                shortest_exprs = vec![or_predicate.clone()];
                break;
            }
        }
    }

    // dedup shortest_exprs
    shortest_exprs.dedup();

    // Check each element in shortest_exprs to see if it's in all the OR arguments.
    let mut exist_exprs: Vec<Predicate> = vec![];
    for expr in shortest_exprs.iter() {
        let found = or_predicates.iter().all(|or_predicate| match or_predicate {
            Predicate::And { args } => args.contains(expr),
            _ => or_predicate == expr,
        });
        if found {
            exist_exprs.push((*expr).clone());
        }
    }
    if exist_exprs.is_empty() {
        return Predicate::Or {
            args: or_predicates.to_vec(),
        };
    }

    // Rebuild the OR predicate.
    // (A AND B) OR A will be optimized to A.
    let mut new_or_predicates = vec![];
    for or_predicate in or_predicates.iter() {
        match or_predicate {
            Predicate::And { args } => {
                let mut new_args = (*args).clone();
                new_args.retain(|expr| !exist_exprs.contains(expr));
                if !new_args.is_empty() {
                    if new_args.len() == 1 {
                        new_or_predicates.push(new_args[0].clone());
                    } else {
                        new_or_predicates.push(Predicate::And { args: new_args });
                    }
                } else {
                    new_or_predicates.clear();
                    break;
                }
            }
            _ => {
                if exist_exprs.contains(or_predicate) {
                    new_or_predicates.clear();
                    break;
                }
            }
        }
    }
    if !new_or_predicates.is_empty() {
        if new_or_predicates.len() == 1 {
            exist_exprs.push(new_or_predicates[0].clone());
        } else {
            exist_exprs.push(Predicate::Or {
                args: flatten_or_predicates(new_or_predicates),
            });
        }
    }

    if exist_exprs.len() == 1 {
        exist_exprs[0].clone()
    } else {
        Predicate::And {
            args: flatten_and_predicates(exist_exprs),
        }
    }
}