in src/main/java/org/apache/datasketches/theta/UnionImpl.java [300:357]
public void union(final Sketch sketchIn) {
//UNION Empty Rule: AND the empty states.
if (sketchIn == null || sketchIn.isEmpty()) {
//null and empty is interpreted as (Theta = 1.0, count = 0, empty = T). Nothing changes
return;
}
//sketchIn is valid and not empty
ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash());
if (sketchIn instanceof SingleItemSketch) {
gadget_.hashUpdate(sketchIn.getCache()[0]);
return;
}
Sketch.checkSketchAndMemoryFlags(sketchIn);
unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule
unionEmpty_ = false;
final int curCountIn = sketchIn.getRetainedEntries(true);
if (curCountIn > 0) {
if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop
//Ordered, thus compact
if (sketchIn.hasMemory()) {
final Memory skMem = ((CompactSketch) sketchIn).getMemory();
final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
for (int i = 0; i < curCountIn; i++ ) {
final int offsetBytes = preambleLongs + i << 3;
final long hashIn = skMem.getLong(offsetBytes);
if (hashIn >= unionThetaLong_) { break; } // "early stop"
gadget_.hashUpdate(hashIn); //backdoor update, hash function is bypassed
}
}
else { //sketchIn is on the Java Heap or has array
final long[] cacheIn = sketchIn.getCache(); //not a copy!
for (int i = 0; i < curCountIn; i++ ) {
final long hashIn = cacheIn[i];
if (hashIn >= unionThetaLong_) { break; } // "early stop"
gadget_.hashUpdate(hashIn); //backdoor update, hash function is bypassed
}
}
} //End ordered, compact
else { //either not-ordered compact or Hash Table form. A HT may have dirty values.
final long[] cacheIn = sketchIn.getCache(); //if off-heap this will be a copy
final int arrLongs = cacheIn.length;
for (int i = 0, c = 0; i < arrLongs && c < curCountIn; i++ ) {
final long hashIn = cacheIn[i];
if (hashIn <= 0L || hashIn >= unionThetaLong_) { continue; } //rejects dirty values
gadget_.hashUpdate(hashIn); //backdoor update, hash function is bypassed
c++; //ensures against invalid state inside the incoming sketch
}
}
}
unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget
if (gadget_.hasMemory()) {
final WritableMemory wmem = (WritableMemory)gadget_.getMemory();
PreambleUtil.insertUnionThetaLong(wmem, unionThetaLong_);
PreambleUtil.clearEmpty(wmem);
}
}