in datafu-pig/src/main/java/datafu/pig/stats/Quantile.java [142:181]
public Tuple call(DataBag bag) throws IOException
{
if (bag == null || bag.size() == 0)
return null;
Map<Long, Double> d = new HashMap<Long, Double>();
long N = bag.size(), max_id = 1;
for (double k : this.quantiles) {
Pair<Long, Long> idx = getIndexes(k, N);
d.put(idx.first, null);
d.put(idx.second, null);
max_id = Math.max(max_id, idx.second);
}
long i = 1;
for (Tuple t : bag) {
if (i > max_id)
break;
if (d.containsKey(i)) {
Object o = t.get(0);
if (!(o instanceof Number))
throw new IllegalStateException("bag must have numerical values (and be non-null)");
d.put(i, ((Number) o).doubleValue());
}
i++;
}
Tuple t = TupleFactory.getInstance().newTuple(this.quantiles.size());
int j = 0;
for (double k : this.quantiles) {
Pair<Long, Long> p = getIndexes(k, N);
double quantile = (d.get(p.first) + d.get(p.second)) / 2;
t.set(j, quantile);
j++;
}
return t;
}