in datafu-pig/src/main/java/datafu/pig/stats/WilsonBinConf.java [89:130]
public Tuple binconf(Long x, Long n) throws IOException
{
NormalDistribution normalDist = new NormalDistributionImpl();
if (x == null || n == null)
return null;
if (x < 0 || n < 0)
throw new IllegalArgumentException("non-negative values expected");
if (x > n)
throw new IllegalArgumentException("invariant violation: number of successes > number of obs");
if (n == 0)
return tupleFactory.newTuple(Arrays.asList(Double.valueOf(0), Double.valueOf(0)));
try {
double zcrit = -1.0 * normalDist.inverseCumulativeProbability(alpha/2);
double z2 = zcrit * zcrit;
double p = x/(double)n;
double a = p + z2/2/n;
double b = zcrit * Math.sqrt((p * (1 - p) + z2/4/n)/n);
double c = (1 + z2/n);
double lower = (a - b) / c;
double upper = (a + b) / c;
// Add corrections for when x is very close to n. This improves the estimates.
// For more info on wilson binomial confidence interval, see paper:
// L.D. Brown, T.T. Cai and A. DasGupta, Interval estimation for a binomial proportion (with discussion),
// _Statistical Science,_*16*:101-133, 2001.
// http://www-stat.wharton.upenn.edu/~tcai/paper/Binomial-StatSci.pdf
if (x == 1)
lower = -Math.log(1 - alpha)/n;
if (x == (n - 1))
upper = 1 + Math.log(1 - alpha)/n;
return tupleFactory.newTuple(Arrays.asList(lower, upper));
}
catch (MathException e) {
throw new IOException("math error", e);
}
}