in datafu-pig/src/main/java/datafu/pig/stats/VAR.java [104:156]
public Tuple exec(Tuple input) throws IOException {
Tuple t = mTupleFactory.newTuple(3);
try {
// input is a bag with one tuple containing
// the column we are trying to get variance
DataBag bg = (DataBag) input.get(0);
DataByteArray dba = null;
Iterator<Tuple> iter = bg.iterator();
if(iter.hasNext()) {
Tuple tp = iter.next();
dba = (DataByteArray)tp.get(0);
}
if (iter.hasNext())
{
throw new RuntimeException("Expected only one tuple in bag");
}
Double d = dba !=null ? Double.valueOf(dba.toString()) : null;
if (dba == null){
t.set(0, null);
t.set(1, null);
t.set(2, 0L);
}
else {
t.set(0, d);
t.set(1, d*d);
t.set(2, 1L);
}
return t;
} catch(NumberFormatException nfe) {
nfe.printStackTrace();
// invalid input,
// treat this input as null
try {
t.set(0, null);
t.set(1, null);
t.set(2, 0L);
} catch (ExecException e) {
throw e;
}
return t;
} catch (ExecException ee) {
ee.printStackTrace();
throw ee;
} catch (Exception e) {
e.printStackTrace();
int errCode = 2106;
String msg = "Error while computing variance in " + this.getClass().getSimpleName();
throw new ExecException(msg, errCode, PigException.BUG, e);
}
}