void doStreamLength()

in src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java [234:328]


  void doStreamLength(final int streamLength) {
    job.println(LS + "Stream Length: " + streamLength );
    job.println(LS + "param k: " + K );
    job.printfData(sFmt, (Object[])columnLabels);
    //build the stream
    stream = streamMaker.makeStream(streamLength, pattern, offset);
    //compute true ranks
    if (ltEq) {
      trueRanks = new TrueRanks(stream, true);
    } else {
      trueRanks = new TrueRanks(stream, false);
    }
    sortedStream = trueRanks.getSortedFloatStream();
    sortedAbsRanks = trueRanks.getSortedAbsRanks();

    //compute the true values used at the plot points
    int startIdx = 0;
    int endIdx = streamLength - 1;
    if (rankRange < 1.0) { //A substream of points focuses on a sub-range at one end.
      final int subStreamLen = (int)Math.round(rankRange * streamLength);
      startIdx = hra ? streamLength - subStreamLen : 0;
      endIdx = hra ? streamLength - 1 : subStreamLen - 1;
    }

    //generates PP indices in [startIdx, endIdx] inclusive, inclusive
    // PV 2020-01-07: using double so that there's enough precision even for large stream lengths
    final double[] temp = evenlySpaced
        ? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints)
        : expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra);

    sortedPPIndices = new int[numPlotPoints];
    sortedPPAbsRanks = new int[numPlotPoints];
    sortedPPValues = new float[numPlotPoints];

    for (int pp = 0; pp < numPlotPoints; pp++) {
      final int idx = (int)Math.round(temp[pp]);
      sortedPPIndices[pp] = idx;
      sortedPPAbsRanks[pp] = sortedAbsRanks[idx];
      sortedPPValues[pp] = sortedStream[idx];
    }

    //Do numTrials for all plotpoints
    for (int t = 0; t < numTrials; t++) {
      doTrial();

      //sumAllocCounts = sk.
    }

    // for special metrics for capturing accuracy per byte
    double sumRelStdDev = 0;
    int numRelStdDev = 0;
    double sumAddStdDev = 0;
    int numAddStdDev = 0;

    //at this point each of the errQSkArr sketches has a distribution of error from numTrials
    for (int pp = 0 ; pp < numPlotPoints; pp++) {
      final double v = sortedPPValues[pp];
      final double tr = v / streamLength; //the true rank
      final double rlb = sk.getRankLowerBound(tr, sd) - tr;
      final double rub = sk.getRankUpperBound(tr, sd) - tr;

      //for each of the numErrDistRanks distributions extract the sd Gaussian quantiles
      final double[] errQ = errQSkArr[pp].getQuantiles(gRanks);
      final int uErrCnt = (int)round(errHllSkArr[pp].getEstimate());

      //Plot the row.
      final double relPP = (double)(pp + 1) / numPlotPoints;
      job.printfData(fFmt, relPP, v, tr,
          errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
          rlb, rub, uErrCnt);

      if (relPP > 0 && relPP < 1
          && (hra && relPP < metricsRankRange || !hra && relPP >= 1 - metricsRankRange)) {
        sumAddStdDev += errQ[4];
        numAddStdDev++;
      }
      if (relPP > 0 && relPP < 1
          && (!hra && relPP < metricsRankRange || hra && relPP >= 1 - metricsRankRange)) {
        sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
        numRelStdDev++;
      }
      errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
      errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
    }
    final int serBytes = sk.getSerializedSizeBytes();

    // special metrics for capturing accuracy per byte
    final double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev;
    final  double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev;
    job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize);
    job.println(     "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize);

    job.println(LS + "Serialization Bytes: " + serBytes);
    job.println(sk.viewCompactorDetail("%5.0f", false));
  }