in src/main/java/org/apache/datasketches/tuple/Union.java [154:215]
public CompactSketch<S> getResult(final boolean reset) {
final CompactSketch<S> result;
if (empty_) {
result = qsk_.compact();
} else if (unionThetaLong_ >= qsk_.thetaLong_ && qsk_.getRetainedEntries() <= qsk_.getNominalEntries()) {
//unionThetaLong_ >= qsk_.thetaLong_ means we can ignore unionThetaLong_. We don't need to rebuild.
//qsk_.getRetainedEntries() <= qsk_.getNominalEntries() means we don't need to pull back to k.
result = qsk_.compact();
} else {
final long tmpThetaLong = min(unionThetaLong_, qsk_.thetaLong_);
//count the number of valid hashes in because Alpha can have dirty values
int numHashesIn = 0;
TupleSketchIterator<S> it = qsk_.iterator();
while (it.next()) { //counts valid hashes
if (it.getHash() < tmpThetaLong) { numHashesIn++; }
}
if (numHashesIn == 0) {
//numHashes == 0 && empty == false means Theta < 1.0
//Therefore, this is a degenerate sketch: theta < 1.0, count = 0, empty = false
result = new CompactSketch<>(null, null, tmpThetaLong, empty_);
}
else {
//we know: empty == false, count > 0
final int numHashesOut;
final long thetaLongOut;
if (numHashesIn > qsk_.getNominalEntries()) {
//we need to trim hashes and need a new thetaLong
final long[] tmpHashArr = new long[numHashesIn]; // temporary, order will be destroyed by quick select
it = qsk_.iterator();
int i = 0;
while (it.next()) {
final long hash = it.getHash();
if (hash < tmpThetaLong) { tmpHashArr[i++] = hash; }
}
numHashesOut = qsk_.getNominalEntries();
thetaLongOut = QuickSelect.select(tmpHashArr, 0, numHashesIn - 1, numHashesOut);
} else {
numHashesOut = numHashesIn;
thetaLongOut = tmpThetaLong;
}
//now prepare the output arrays
final long[] hashArr = new long[numHashesOut];
final S[] summaries = Util.newSummaryArray(qsk_.getSummaryTable(), numHashesOut);
it = qsk_.iterator();
int i = 0;
while (it.next()) { //select the qualifying hashes from the gadget synchronized with the summaries
final long hash = it.getHash();
if (hash < thetaLongOut) {
hashArr[i] = hash;
summaries[i] = (S) it.getSummary().copy();
i++;
}
}
result = new CompactSketch<>(hashArr, summaries, thetaLongOut, empty_);
}
}
if (reset) { reset(); }
return result;
}