in hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ComplexUnnestToProductRule.java [155:304]
private boolean findPlanPartition(AbstractLogicalOperator op, HashSet<LogicalVariable> innerUsedVars,
HashSet<LogicalVariable> outerUsedVars, List<ILogicalOperator> innerOps, List<ILogicalOperator> outerOps,
List<ILogicalOperator> topSelects, boolean belowSecondUnnest) throws AlgebricksException {
if (belowSecondUnnest && innerUsedVars.isEmpty()) {
// Trivially joinable.
return true;
}
if (!belowSecondUnnest) {
// Bail on the following operators.
switch (op.getOperatorTag()) {
case AGGREGATE:
case SUBPLAN:
case GROUP:
case UNNEST_MAP:
return false;
}
}
switch (op.getOperatorTag()) {
case UNNEST:
case DATASOURCESCAN: {
// We may have reached this state by descending through a subplan.
outerOps.add(op);
return true;
}
case INNERJOIN:
case LEFTOUTERJOIN: {
// Make sure that no variables that are live under this join are needed by the inner.
List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
VariableUtilities.getLiveVariables(op, liveVars);
for (LogicalVariable liveVar : liveVars) {
if (innerUsedVars.contains(liveVar)) {
return false;
}
}
outerOps.add(op);
return true;
}
case SELECT: {
// Remember this select to pulling it above the join.
if (innerUsedVars.isEmpty()) {
outerOps.add(op);
} else {
topSelects.add(op);
}
break;
}
case PROJECT: {
// Throw away projects from the plan since we are pulling selects up.
break;
}
case EMPTYTUPLESOURCE:
case NESTEDTUPLESOURCE: {
if (belowSecondUnnest) {
// We have successfully partitioned the plan into independent parts to be plugged into the join.
return true;
} else {
// We could not find a second unnest or a join.
return false;
}
}
default: {
if (op.getInputs().size() > 1) {
return false;
}
// The inner is trivially independent.
if (!belowSecondUnnest && innerUsedVars.isEmpty()) {
outerOps.add(op);
break;
}
// Examine produced vars to determine which partition uses them.
List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getProducedVariables(op, producedVars);
int outerMatches = 0;
int innerMatches = 0;
for (LogicalVariable producedVar : producedVars) {
if (outerUsedVars.contains(producedVar)) {
outerMatches++;
}
if (innerUsedVars.contains(producedVar)) {
innerMatches++;
}
}
HashSet<LogicalVariable> targetUsedVars = null;
if (outerMatches == producedVars.size() && !producedVars.isEmpty()) {
// All produced vars used by outer partition.
outerOps.add(op);
targetUsedVars = outerUsedVars;
}
if (innerMatches == producedVars.size() && !producedVars.isEmpty()) {
// All produced vars used by inner partition.
innerOps.add(op);
targetUsedVars = innerUsedVars;
}
if (innerMatches == 0 && outerMatches == 0) {
// Op produces variables that are not used in the part of the plan we've seen (or it doesn't produce any vars).
// Try to figure out where it belongs by analyzing the used variables.
List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getUsedVariables(op, usedVars);
for (LogicalVariable usedVar : usedVars) {
boolean canBreak = false;
if (outerUsedVars.contains(usedVar)) {
outerOps.add(op);
targetUsedVars = outerUsedVars;
canBreak = true;
}
if (innerUsedVars.contains(usedVar)) {
innerOps.add(op);
targetUsedVars = innerUsedVars;
canBreak = true;
}
if (canBreak) {
break;
}
}
} else if (innerMatches != 0 && outerMatches != 0) {
// The current operator produces variables that are used by both partitions, so the inner and outer are not independent and, therefore, we cannot create a join.
// TODO: We may still be able to split the operator to create a viable partitioning.
return false;
}
// TODO: For now we bail here, but we could remember such ops and determine their target partition at a later point.
if (targetUsedVars == null) {
return false;
}
// Update used variables of partition that op belongs to.
if (op.hasNestedPlans() && op.getOperatorTag() != LogicalOperatorTag.SUBPLAN) {
AbstractOperatorWithNestedPlans opWithNestedPlans = (AbstractOperatorWithNestedPlans) op;
opWithNestedPlans.getUsedVariablesExceptNestedPlans(targetUsedVars);
} else {
VariableUtilities.getUsedVariables(op, targetUsedVars);
}
break;
}
}
if (!op.hasInputs()) {
if (!belowSecondUnnest) {
// We could not find a second unnest or a join.
return false;
} else {
// We have successfully partitioned the plan into independent parts to be plugged into the join.
return true;
}
}
return findPlanPartition((AbstractLogicalOperator) op.getInputs().get(0).getValue(), innerUsedVars,
outerUsedVars, innerOps, outerOps, topSelects, belowSecondUnnest);
}