public Tuple binconf()

in datafu-pig/src/main/java/datafu/pig/stats/WilsonBinConf.java [89:130]


  public Tuple binconf(Long x, Long n) throws IOException
  {
    NormalDistribution normalDist = new NormalDistributionImpl();

    if (x == null || n == null)
      return null;
    if (x < 0 || n < 0)
      throw new IllegalArgumentException("non-negative values expected");
    if (x > n)
      throw new IllegalArgumentException("invariant violation: number of successes > number of obs");
    if (n == 0)
      return tupleFactory.newTuple(Arrays.asList(Double.valueOf(0), Double.valueOf(0)));

    try {
      double zcrit = -1.0 * normalDist.inverseCumulativeProbability(alpha/2);
      double z2 = zcrit * zcrit;
      double p = x/(double)n;

      double a = p + z2/2/n;
      double b = zcrit * Math.sqrt((p * (1 - p) + z2/4/n)/n);
      double c = (1 + z2/n);

      double lower = (a - b) / c;
      double upper = (a + b) / c;

      // Add corrections for when x is very close to n.  This improves the estimates.
      // For more info on wilson binomial confidence interval, see paper:
      // L.D. Brown, T.T. Cai and A. DasGupta, Interval estimation for a binomial proportion (with discussion), 
      //   _Statistical Science,_*16*:101-133, 2001. 
      // http://www-stat.wharton.upenn.edu/~tcai/paper/Binomial-StatSci.pdf
      
      if (x == 1)
        lower = -Math.log(1 - alpha)/n;
      if (x == (n - 1))
        upper = 1 + Math.log(1 - alpha)/n;

      return tupleFactory.newTuple(Arrays.asList(lower, upper));
    }
    catch (MathException e) {
      throw new IOException("math error", e);
    }
  }