in src/main/java/org/apache/datasketches/pig/sampling/ReservoirSampling.java [124:149]
public Schema outputSchema(final Schema input) {
if ((input != null) && (input.size() > 0)) {
try {
Schema source = input;
// if we have a bag, grab one level down to get a tuple
if ((source.size() == 1) && (source.getField(0).type == DataType.BAG)) {
source = source.getField(0).schema;
}
final Schema recordSchema = new Schema();
recordSchema.add(new Schema.FieldSchema(N_ALIAS, DataType.LONG));
recordSchema.add(new Schema.FieldSchema(K_ALIAS, DataType.INTEGER));
// this should add a bag to the output
recordSchema.add(new Schema.FieldSchema(SAMPLES_ALIAS, source, DataType.BAG));
return new Schema(new Schema.FieldSchema(getSchemaName(this
.getClass().getName().toLowerCase(), source), recordSchema, DataType.TUPLE));
}
catch (final FrontendException e) {
throw new RuntimeException(e);
}
}
return null;
}