in src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java [528:568]
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
final Union union = newUnion(this.myNomEntries_, this.myP_, this.mySeed_);
final DataBag outerBag = extractBag(inputTuple); //InputTuple.bag0
if (outerBag == null) { //must have non-empty outer bag at field 0.
return this.myEmptyCompactOrderedSketchTuple_; //abort & return empty sketch
}
//Bag is not empty.
for (Tuple dataTuple : outerBag) {
final Object f0 = extractFieldAtIndex(dataTuple, 0); //inputTuple.bag0.dataTupleN.f0
//must have non-null field zero
if (f0 == null) {
continue; //go to next dataTuple if there is one
}
//f0 is not null
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag)f0; //inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
//If field 0 of a dataTuple is a Bag all innerTuples of this inner bag
// will be passed into the union.
//It is due to system bagged outputs from multiple mapper Initial functions.
//The Intermediate stage was bypassed.
updateUnion(innerBag, union); //process all tuples of innerBag
}
else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
//If field 0 of a dataTuple is a DataByteArray we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.union(Memory.wrap(dba.get()));
}
else { // we should never get here.
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
} //End for
final CompactSketch compactSketch = union.getResult(true, null);
return compactOrderedSketchToTuple(compactSketch);
}