in datafu-pig/src/main/java/datafu/pig/text/opennlp/POSTag.java [125:176]
public Schema outputSchema(Schema input)
{
try
{
Schema.FieldSchema inputFieldSchema = input.getField(0);
if (inputFieldSchema.type != DataType.BAG)
{
throw new RuntimeException("Expected a BAG as input");
}
Schema inputBagSchema = inputFieldSchema.schema;
if(inputBagSchema == null) {
return null;
}
if (inputBagSchema.getField(0).type != DataType.TUPLE)
{
throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
DataType.findTypeName(inputBagSchema.getField(0).type)));
}
Schema inputTupleSchema = inputBagSchema.getField(0).schema;
if (inputTupleSchema.size() != 1)
{
throw new RuntimeException("Expected one field for the token data");
}
if (inputTupleSchema.getField(0).type != DataType.CHARARRAY)
{
throw new RuntimeException(String.format("Expected source to be a CHARARRAY, but instead found %s",
DataType.findTypeName(inputTupleSchema.getField(0).type)));
}
Schema tupleSchema = new Schema();
tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));
tupleSchema.add(new Schema.FieldSchema("tag",DataType.CHARARRAY));
tupleSchema.add(new Schema.FieldSchema("probability",DataType.DOUBLE));
return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
.getName()
.toLowerCase(), input),
tupleSchema,
DataType.BAG));
}
catch (FrontendException e)
{
throw new RuntimeException(e);
}
}