in src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java [234:328]
void doStreamLength(final int streamLength) {
job.println(LS + "Stream Length: " + streamLength );
job.println(LS + "param k: " + K );
job.printfData(sFmt, (Object[])columnLabels);
//build the stream
stream = streamMaker.makeStream(streamLength, pattern, offset);
//compute true ranks
if (ltEq) {
trueRanks = new TrueRanks(stream, true);
} else {
trueRanks = new TrueRanks(stream, false);
}
sortedStream = trueRanks.getSortedFloatStream();
sortedAbsRanks = trueRanks.getSortedAbsRanks();
//compute the true values used at the plot points
int startIdx = 0;
int endIdx = streamLength - 1;
if (rankRange < 1.0) { //A substream of points focuses on a sub-range at one end.
final int subStreamLen = (int)Math.round(rankRange * streamLength);
startIdx = hra ? streamLength - subStreamLen : 0;
endIdx = hra ? streamLength - 1 : subStreamLen - 1;
}
//generates PP indices in [startIdx, endIdx] inclusive, inclusive
// PV 2020-01-07: using double so that there's enough precision even for large stream lengths
final double[] temp = evenlySpaced
? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints)
: expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra);
sortedPPIndices = new int[numPlotPoints];
sortedPPAbsRanks = new int[numPlotPoints];
sortedPPValues = new float[numPlotPoints];
for (int pp = 0; pp < numPlotPoints; pp++) {
final int idx = (int)Math.round(temp[pp]);
sortedPPIndices[pp] = idx;
sortedPPAbsRanks[pp] = sortedAbsRanks[idx];
sortedPPValues[pp] = sortedStream[idx];
}
//Do numTrials for all plotpoints
for (int t = 0; t < numTrials; t++) {
doTrial();
//sumAllocCounts = sk.
}
// for special metrics for capturing accuracy per byte
double sumRelStdDev = 0;
int numRelStdDev = 0;
double sumAddStdDev = 0;
int numAddStdDev = 0;
//at this point each of the errQSkArr sketches has a distribution of error from numTrials
for (int pp = 0 ; pp < numPlotPoints; pp++) {
final double v = sortedPPValues[pp];
final double tr = v / streamLength; //the true rank
final double rlb = sk.getRankLowerBound(tr, sd) - tr;
final double rub = sk.getRankUpperBound(tr, sd) - tr;
//for each of the numErrDistRanks distributions extract the sd Gaussian quantiles
final double[] errQ = errQSkArr[pp].getQuantiles(gRanks);
final int uErrCnt = (int)round(errHllSkArr[pp].getEstimate());
//Plot the row.
final double relPP = (double)(pp + 1) / numPlotPoints;
job.printfData(fFmt, relPP, v, tr,
errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
rlb, rub, uErrCnt);
if (relPP > 0 && relPP < 1
&& (hra && relPP < metricsRankRange || !hra && relPP >= 1 - metricsRankRange)) {
sumAddStdDev += errQ[4];
numAddStdDev++;
}
if (relPP > 0 && relPP < 1
&& (!hra && relPP < metricsRankRange || hra && relPP >= 1 - metricsRankRange)) {
sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
numRelStdDev++;
}
errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
}
final int serBytes = sk.getSerializedSizeBytes();
// special metrics for capturing accuracy per byte
final double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev;
final double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev;
job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize);
job.println( "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize);
job.println(LS + "Serialization Bytes: " + serBytes);
job.println(sk.viewCompactorDetail("%5.0f", false));
}