in processing/src/main/java/org/apache/druid/extendedset/intset/ImmutableConciseSet.java [491:690]
public static ImmutableConciseSet doIntersection(Iterator<ImmutableConciseSet> sets)
{
IntList retVal = new IntList();
ArrayList<WordIterator> iterators = new ArrayList<>();
// populate priority queue
while (sets.hasNext()) {
ImmutableConciseSet set = sets.next();
if (set == null || set.isEmpty()) {
return new ImmutableConciseSet();
}
WordIterator itr = set.newWordIterator();
itr.word = itr.next();
iterators.add(itr);
}
// Keep iterators in a sorted array, because usually only a few bitsets are intersected, very rarely - a few dozens.
// Sorted array approach was benchmarked and proven to be faster than PriorityQueue (as in doUnion()) up to 100
// bitsets.
WordIterator[] theQ = iterators.toArray(new WordIterator[0]);
int qSize = theQ.length;
partialSort(theQ, qSize - 1, qSize, INTERSECTION_COMPARATOR);
int currIndex = 0;
int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE;
while (qSize > 0) {
int maxChangedIndex = -1;
// grab the top element from the priority queue
WordIterator itr = theQ[0];
int word = itr.getWord();
// if a sequence has ended, we can break out because of Boolean logic
if (itr.startIndex >= wordsWalkedAtSequenceEnd) {
break;
}
// if the next word in the queue starts at a different point than where we ended off we need to create a one gap
// to fill the space
if (currIndex < itr.startIndex) {
// number of 31 bit blocks that compromise the fill minus one
addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1)));
currIndex = itr.startIndex;
}
if (ConciseSetUtils.isLiteral(word)) {
// advance all other literals
int qIndex = 1;
while (qIndex < qSize &&
theQ[qIndex].startIndex == itr.startIndex) {
WordIterator i = theQ[qIndex];
int w = i.getWord();
// if we still have one fills with flipped bits, AND them here
if (ConciseSetUtils.isLiteral(w)) {
word &= w;
} else {
int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
word &= flipBitLiteral;
i.advanceTo(itr.wordsWalked);
}
}
if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// advance the set with the current literal forward and push result back to priority queue
addAndCompact(retVal, word);
currIndex++;
if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
} else if (ConciseSetUtils.isZeroSequence(word)) {
// extract a literal from the flip bits of the zero sequence
int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
// advance everything past the longest zero sequence
int qIndex = 1;
while (qIndex < qSize &&
theQ[qIndex].startIndex < itr.wordsWalked) {
WordIterator i = theQ[qIndex];
int w = i.getWord();
if (i.startIndex == itr.startIndex) {
// if a literal was created from a flip bit, AND it with other literals or literals from flip bits in the
// same position
if (ConciseSetUtils.isLiteral(w)) {
flipBitLiteral &= w;
} else if (ConciseSetUtils.isZeroSequence(w)) {
flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
} else {
assert ConciseSetUtils.isOneSequence(w);
flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
}
}
i.advanceTo(itr.wordsWalked);
if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// advance longest zero literal forward and push result back to priority queue
// if a flip bit is still needed, put it in the correct position
int newWord = word & 0xC1FFFFFF;
if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
newWord = (word & 0xC1FFFFFF) | (position << 25);
}
addAndCompact(retVal, newWord);
currIndex = itr.wordsWalked;
if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
} else {
assert ConciseSetUtils.isOneSequence(word);
int flipBitLiteral;
int qIndex = 1;
while (qIndex < qSize &&
theQ[qIndex].startIndex == itr.startIndex) {
// check if literal can be created flip bits of other one sequences
WordIterator i = theQ[qIndex];
int w = i.getWord();
flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
i.word = flipBitLiteral;
maxChangedIndex = qIndex;
qIndex++;
} else if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// check if a literal needs to be created from the flipped bits of this sequence
flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
itr.word = flipBitLiteral;
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
if (maxChangedIndex >= 0) {
partialSort(theQ, maxChangedIndex, qSize, INTERSECTION_COMPARATOR);
}
}
// fill in any missing one sequences
if (currIndex < wordsWalkedAtSequenceEnd) {
addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1)));
}
if (retVal.isEmpty()) {
return new ImmutableConciseSet();
}
return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}