public List evaluate()

in src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDF.java [50:75]


  public List<Double> evaluate(final BytesWritable serializedSketchA, final BytesWritable serializedSketchB) {
    if (serializedSketchA == null || serializedSketchB == null) { return null; }
    final ArrayOfDoublesSketch sketchA =
        ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory(serializedSketchA));
    final ArrayOfDoublesSketch sketchB =
        ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory(serializedSketchB));

    if (sketchA.getNumValues() != sketchB.getNumValues()) {
      throw new IllegalArgumentException("Both sketches must have the same number of values");
    }

    // If the sketches contain fewer than 2 values, the p-value can't be calculated
    if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
      return null;
    }

    final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
    final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);

    final TTest tTest = new TTest();
    final List<Double> pValues = new ArrayList<>(sketchA.getNumValues());
    for (int i = 0; i < sketchA.getNumValues(); i++) {
      pValues.add(tTest.tTest(summariesA[i], summariesB[i]));
    }
    return pValues;
  }