public Tuple exec()

in src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java [42:79]


  public Tuple exec(final Tuple input) throws IOException {
    if (input == null || input.size() != 2) {
      return null;
    }

    // Get the two sketches
    final DataByteArray dbaA = (DataByteArray) input.get(0);
    final DataByteArray dbaB = (DataByteArray) input.get(1);
    final ArrayOfDoublesSketch sketchA = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaA.get()));
    final ArrayOfDoublesSketch sketchB = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaB.get()));

    // Check that the size of the arrays in the sketches are the same
    if (sketchA.getNumValues() != sketchB.getNumValues()) {
      throw new IllegalArgumentException("Both sketches must have the same number of values");
    }

    // Store the number of metrics
    final int numMetrics = sketchA.getNumValues();

    // If the sketches contain fewer than 2 values, the p-value can't be calculated
    if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
      return null;
    }

    // Get the statistical summary from each sketch
    final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
    final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);

    // Calculate the p-values
    final TTest tTest = new TTest();
    final Tuple pValues = TupleFactory.getInstance().newTuple(numMetrics);
    for (int i = 0; i < numMetrics; i++) {
      // Pass the sampled values for each metric
      pValues.set(i, tTest.tTest(summariesA[i], summariesB[i]));
    }

    return pValues;
  }