in datafu-pig/src/main/java/datafu/pig/stats/StreamingQuantile.java [372:414]
public List<Double> getQuantiles()
{
List<Double> quantiles = new ArrayList<Double>();
quantiles.add(min);
if (buffer.get(0) != null) {
Collections.sort(buffer.get(0));
}
if (buffer.get(1) != null) {
Collections.sort(buffer.get(1));
}
int[] index = new int[buffer.size()];
long S = 0;
for (int i = 1; i <= numQuantiles - 2; i++) {
long targetS = (long) Math.ceil(i * (totalElements / (numQuantiles - 1.0)));
while (true) {
double smallest = max;
int minBufferId = -1;
for (int j = 0; j < buffer.size(); j++) {
if (buffer.get(j) != null && index[j] < buffer.get(j).size()) {
if (!(smallest < buffer.get(j).get(index[j]))) {
smallest = buffer.get(j).get(index[j]);
minBufferId = j;
}
}
}
long incrementS = minBufferId <= 1 ? 1L : (0x1L << (minBufferId - 1));
if (S + incrementS >= targetS) {
quantiles.add(smallest);
break;
} else {
index[minBufferId]++;
S += incrementS;
}
}
}
quantiles.add(max);
return quantiles;
}