in src/org/apache/pig/newplan/logical/rules/PushUpFilter.java [75:161]
public boolean check(OperatorPlan matched) throws FrontendException {
// check if it is inner join
Operator current = matched.getSources().get(0);
Operator pred = findNonFilterPredecessor( current );
if( pred == null )
return false;
// sort and union are always okay.
if( pred instanceof LOSort || pred instanceof LOUnion ) {
return true;
}
// if the predecessor is one of LOLoad/LOStore/LOStream/LOLimit/LONative
// if predecessor is LOForEach, it is optimized by rule FilterAboveForeach
// return false
if( pred instanceof LOLoad || pred instanceof LOStore || pred instanceof LOStream ||
pred instanceof LOFilter || pred instanceof LOSplit || pred instanceof LOSplitOutput ||
pred instanceof LOLimit || pred instanceof LONative || pred instanceof LOForEach) {
return false;
}
LOFilter filter = (LOFilter)current;
List<Operator> preds = currentPlan.getPredecessors( pred );
LogicalExpressionPlan filterPlan = filter.getFilterPlan();
if (OptimizerUtils.planHasNonDeterministicUdf(filterPlan)) {
return false;
}
//if there is no nondeterministic udf, filter can be pushed above
// Distinct
if(pred instanceof LODistinct){
return true;
}
// collect all uids used in the filter plan
Set<Long> uids = collectUidFromExpPlan(filterPlan);
if( pred instanceof LOCogroup ) {
LOCogroup cogrp = (LOCogroup)pred;
if( preds.size() == 1 ) {
if( hasAll( (LogicalRelationalOperator)preds.get( 0 ), uids ) ) {
// Order by is ok if all UIDs can be found from previous operator.
return true;
}
} else if ( 1 == cogrp.getExpressionPlans().get( 0 ).size() && !containUDF( filterPlan ) ) {
// Optimization is possible if there is only a single key.
// For regular cogroup, we cannot use UIDs to determine if filter can be pushed up.
// But if there is no UDF, it's okay, as only UDF can take bag field as input.
return true;
}
}
// if the predecessor is a multi-input operator then detailed
// checks are required
if( pred instanceof LOCross || pred instanceof LOJoin ) {
boolean[] innerFlags = null;
boolean isFullOuter = true;
boolean isInner = true;
if( pred instanceof LOJoin ) {
innerFlags = ((LOJoin)pred).getInnerFlags();
// If all innerFlag is false, means a full outer join,
for (boolean inner : innerFlags) {
if (inner) {
isFullOuter = false;
} else {
isInner = false;
}
}
if (isFullOuter)
return false;
}
for(int j=0; j<preds.size(); j++) {
if (hasAll((LogicalRelationalOperator)preds.get(j), uids)) {
// For LOJoin, innerFlag==true indicate that branch is the outer join side
// which has the exact opposite semantics
if (pred instanceof LOCross || pred instanceof LOJoin && (isInner || innerFlags[j]))
return true;
}
}
}
return false;
}