in parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java [110:177]
public AvroRecordConverter(ParentValueContainer parent,
GroupType parquetSchema, Schema avroSchema,
GenericData model) {
super(parent);
this.avroSchema = avroSchema;
this.model = (model == null ? ReflectData.get() : model);
this.converters = new Converter[parquetSchema.getFieldCount()];
Map<String, Integer> avroFieldIndexes = new HashMap<String, Integer>();
int avroFieldIndex = 0;
for (Schema.Field field: avroSchema.getFields()) {
avroFieldIndexes.put(field.name(), avroFieldIndex++);
}
Class<?> recordClass = null;
if (model instanceof ReflectData) {
recordClass = getDatumClass(avroSchema, model);
}
Map<String, Class<?>> fields = getFieldsByName(recordClass, false);
int parquetFieldIndex = 0;
for (Type parquetField: parquetSchema.getFields()) {
final Schema.Field avroField = getAvroField(parquetField.getName());
Schema nonNullSchema = AvroSchemaConverter.getNonNull(avroField.schema());
final int finalAvroIndex = avroFieldIndexes.remove(avroField.name());
ParentValueContainer container = new ParentValueContainer() {
@Override
public void add(Object value) {
AvroRecordConverter.this.set(avroField.name(), finalAvroIndex, value);
}
};
Class<?> fieldClass = fields.get(avroField.name());
converters[parquetFieldIndex] = newConverter(
nonNullSchema, parquetField, this.model, fieldClass, container);
// @Stringable doesn't affect the reflected schema; must be enforced here
if (recordClass != null &&
converters[parquetFieldIndex] instanceof FieldStringConverter) {
try {
Field field = recordClass.getDeclaredField(avroField.name());
if (field.isAnnotationPresent(Stringable.class)) {
converters[parquetFieldIndex] = new FieldStringableConverter(
container, field.getType());
}
} catch (NoSuchFieldException e) {
// must not be stringable
}
}
parquetFieldIndex += 1;
}
// store defaults for any new Avro fields from avroSchema that are not in
// the writer schema (parquetSchema)
for (String fieldName : avroFieldIndexes.keySet()) {
Schema.Field field = avroSchema.getField(fieldName);
if (field.schema().getType() == Schema.Type.NULL) {
continue; // skip null since Parquet does not write nulls
}
if (field.defaultVal() == null || this.model.getDefaultValue(field) == null) {
continue; // field has no default
}
// use this.model because model may be null
recordDefaults.put(field, this.model.getDefaultValue(field));
}
}