in ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java [609:769]
private void genObject(int aliasNum, boolean allLeftFirst, boolean allLeftNull)
throws HiveException {
JoinCondDesc joinCond = condn[aliasNum - 1];
int type = joinCond.getType();
int left = joinCond.getLeft();
int right = joinCond.getRight();
if (needsPostEvaluation && aliasNum == numAliases - 2) {
int nextType = condn[aliasNum].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// Initialize container to use for storing tuples before emitting them
rowContainerPostFilteredOuterJoin = new HashMap<>();
}
}
boolean[] skip = skipVectors[aliasNum];
boolean[] prevSkip = skipVectors[aliasNum - 1];
// search for match in the rhs table
AbstractRowContainer<List<Object>> aliasRes = storage[order[aliasNum]];
boolean needToProduceLeftRow = false;
boolean producedRow = false;
boolean done = false;
boolean loopAgain = false;
boolean tryLOForFO = type == JoinDesc.FULL_OUTER_JOIN;
boolean rightFirst = true;
AbstractRowContainer.RowIterator<List<Object>> iter = aliasRes.rowIter();
int pos = 0;
for (List<Object> rightObj = iter.first(); !done && rightObj != null;
rightObj = loopAgain ? rightObj : iter.next(), rightFirst = loopAgain = false, pos++) {
// Keep a copy of the skip vector and update the bit for current alias only in the loop.
System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
boolean rightNull = rightObj == dummyObj[aliasNum];
if (hasFilter(order[aliasNum])) {
filterTags[aliasNum] = getFilterTag(rightObj);
}
skip[right] = rightNull;
if (type == JoinDesc.INNER_JOIN) {
innerJoin(skip, left, right);
} else if (type == JoinDesc.LEFT_SEMI_JOIN) {
if (innerJoin(skip, left, right)) {
// if left-semi-join found a match and we do not have any additional predicates,
// skipping the rest of the rows in the rhs table of the semijoin
done = !needsPostEvaluation;
}
} else if (type == JoinDesc.ANTI_JOIN) {
if (innerJoin(skip, left, right)) {
// if inner join found a match then the condition is not matched for anti join, so we can skip rest of the
// record. But if there is some post evaluation we have to handle that.
done = !needsPostEvaluation;
}
} else if (type == JoinDesc.LEFT_OUTER_JOIN ||
(type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
int result = leftOuterJoin(skip, left, right);
if (result < 0) {
continue;
}
done = result > 0;
} else if (type == JoinDesc.RIGHT_OUTER_JOIN ||
(type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) {
if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
!allLeftFirst && !innerJoin(skip, left, right)) {
continue;
}
} else if (type == JoinDesc.FULL_OUTER_JOIN) {
if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) {
loopAgain = allLeftFirst;
done = !loopAgain;
tryLOForFO = false;
} else if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
!allLeftFirst && !innerJoin(skip, left, right)) {
continue;
}
}
intermediate[aliasNum] = rightObj;
if (aliasNum == numAliases - 1) {
if (!(allLeftNull && rightNull)) {
needToProduceLeftRow = true;
if (needsPostEvaluation) {
// This is only executed for outer joins with residual filters
boolean forward = createForwardJoinObject(skipVectors[numAliases - 1],
type == JoinDesc.ANTI_JOIN);
producedRow |= forward;
done = (type == JoinDesc.LEFT_SEMI_JOIN) && forward;
if (!rightNull &&
(type == JoinDesc.RIGHT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN)) {
if (forward) {
// This record produced a result this time, remove it from the storage
// as it will not need to produce a result with NULL values anymore
rowContainerPostFilteredOuterJoin.put(pos, null);
} else {
// We need to store this record (if it is not done yet) in case
// we should produce a result
if (!rowContainerPostFilteredOuterJoin.containsKey(pos)) {
Object[] row = Arrays.copyOfRange(forwardCache, offsets[aliasNum], offsets[aliasNum + 1]);
rowContainerPostFilteredOuterJoin.put(pos, row);
}
}
}
} else {
createForwardJoinObject(skipVectors[numAliases - 1], type == JoinDesc.ANTI_JOIN);
}
}
} else {
// recursively call the join the other rhs tables
genObject(aliasNum + 1, allLeftFirst && rightFirst, allLeftNull && rightNull);
}
}
// For anti join, we should proceed to emit records if the right side is empty or not matching.
if (type == JoinDesc.ANTI_JOIN && !producedRow) {
System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
skip[right] = true;
if (aliasNum == numAliases - 1) {
createForwardJoinObjectForAntiJoin(skipVectors[numAliases - 1]);
} else {
genObject(aliasNum + 1, allLeftFirst, allLeftNull);
}
}
// Consolidation for outer joins
if (needsPostEvaluation && aliasNum == numAliases - 1 &&
needToProduceLeftRow && !producedRow && !allLeftNull) {
if (type == JoinDesc.LEFT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN) {
// If it is a LEFT / FULL OUTER JOIN and the left record did not produce
// results, we need to take that record, replace the right side with NULL
// values, and produce the records
int i = numAliases - 1;
for (int j = offsets[i]; j < offsets[i + 1]; j++) {
forwardCache[j] = null;
}
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
} else if (needsPostEvaluation && aliasNum == numAliases - 2) {
int nextType = condn[aliasNum].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// If it is a RIGHT / FULL OUTER JOIN, we need to iterate through the row container
// that contains all the right records that did not produce results. Then, for each
// of those records, we replace the left side with NULL values, and produce the
// records.
// Observe that we only enter this block when we have finished iterating through
// all the left and right records (aliasNum == numAliases - 2), and thus, we have
// tried to evaluate the post-filter condition on every possible combination.
Arrays.fill(forwardCache, null);
for (Object[] row : rowContainerPostFilteredOuterJoin.values()) {
if (row == null) {
continue;
}
System.arraycopy(row, 0, forwardCache, offsets[numAliases - 1], row.length);
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
}
}
}