in datafu-pig/src/main/java/datafu/pig/stats/entropy/EmpiricalCountEntropy.java [379:426]
public Schema outputSchema(Schema input)
{
try {
Schema.FieldSchema inputFieldSchema = input.getField(0);
if (inputFieldSchema.type != DataType.BAG)
{
throw new RuntimeException("Expected a BAG as input");
}
Schema inputBagSchema = inputFieldSchema.schema;
if (inputBagSchema.getField(0).type != DataType.TUPLE)
{
throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
DataType.findTypeName(inputBagSchema.getField(0).type)));
}
Schema tupleSchema = inputBagSchema.getField(0).schema;
if(tupleSchema == null) {
throw new RuntimeException("The tuple of input bag has no schema");
}
List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
if(fieldSchemaList == null || fieldSchemaList.size() != 1) {
throw new RuntimeException("The field schema of the input tuple is null or its size is not 1");
}
if(fieldSchemaList.get(0).type != DataType.INTEGER &&
fieldSchemaList.get(0).type != DataType.LONG )
{
String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
DataType.findTypeName(DataType.LONG)};
throw new RuntimeException("Expect the type of the input tuple to be of (" +
java.util.Arrays.toString(expectedTypes) + "), but instead found " +
DataType.findTypeName(fieldSchemaList.get(0).type));
}
return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
.getName()
.toLowerCase(), input),
DataType.DOUBLE));
} catch (FrontendException e) {
throw new RuntimeException(e);
}
}