in adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java [580:771]
private static Field avroSchemaToField(
Schema schema,
String name,
boolean nullable,
AvroToArrowConfig config,
Map<String, String> externalProps) {
final Schema.Type type = schema.getType();
final LogicalType logicalType = schema.getLogicalType();
final List<Field> children = new ArrayList<>();
final FieldType fieldType;
switch (type) {
case UNION:
boolean nullableUnion =
schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL);
if (nullableUnion && schema.getTypes().size() == 2 && !config.isLegacyMode()) {
// For a simple nullable (null | type), interpret the union as a single nullable field.
// Not available in legacy mode, which uses the literal interpretation instead
Schema childSchema =
schema.getTypes().get(0).getType() == Schema.Type.NULL
? schema.getTypes().get(1)
: schema.getTypes().get(0);
return avroSchemaToField(childSchema, name, true, config, externalProps);
} else {
// Literal interpretation of a union, which may or may not include a null element.
for (int i = 0; i < schema.getTypes().size(); i++) {
Schema childSchema = schema.getTypes().get(i);
// Union child vector should use default name
children.add(avroSchemaToField(childSchema, null, nullableUnion, config, null));
}
fieldType =
createFieldType(
new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps, config);
}
break;
case ARRAY:
Schema elementSchema = schema.getElementType();
children.add(avroSchemaToField(elementSchema, ListVector.DATA_VECTOR_NAME, config));
fieldType = createFieldType(nullable, new ArrowType.List(), schema, externalProps, config);
break;
case MAP:
// MapVector internal struct field and key field should be non-nullable
FieldType keyFieldType =
new FieldType(/* nullable= */ false, new ArrowType.Utf8(), /* dictionary= */ null);
Field keyField = new Field(MapVector.KEY_NAME, keyFieldType, /* children= */ null);
Field valueField = avroSchemaToField(schema.getValueType(), MapVector.VALUE_NAME, config);
FieldType structFieldType =
new FieldType(false, new ArrowType.Struct(), /* dictionary= */ null);
Field structField =
new Field(
MapVector.DATA_VECTOR_NAME, structFieldType, Arrays.asList(keyField, valueField));
children.add(structField);
fieldType =
createFieldType(
nullable,
new ArrowType.Map(/* keysSorted= */ false),
schema,
externalProps,
config);
break;
case RECORD:
final Set<String> skipFieldNames = config.getSkipFieldNames();
for (int i = 0; i < schema.getFields().size(); i++) {
final Schema.Field field = schema.getFields().get(i);
Schema childSchema = field.schema();
String fullChildName = String.format("%s.%s", name, field.name());
if (!skipFieldNames.contains(fullChildName)) {
final Map<String, String> extProps = new HashMap<>();
String doc = field.doc();
Set<String> aliases = field.aliases();
if (doc != null) {
extProps.put("doc", doc);
}
if (aliases != null && (!aliases.isEmpty() || config.isLegacyMode())) {
extProps.put("aliases", convertAliases(aliases));
}
children.add(avroSchemaToField(childSchema, fullChildName, config, extProps));
}
}
fieldType =
createFieldType(nullable, new ArrowType.Struct(), schema, externalProps, config);
break;
case ENUM:
DictionaryProvider.MapDictionaryProvider provider = config.getProvider();
int current = provider.getDictionaryIds().size();
int enumCount = schema.getEnumSymbols().size();
ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount);
fieldType =
createFieldType(
nullable,
indexType,
schema,
externalProps,
new DictionaryEncoding(current, /* ordered= */ false, /* indexType= */ indexType),
config);
break;
case STRING:
fieldType = createFieldType(nullable, new ArrowType.Utf8(), schema, externalProps, config);
break;
case FIXED:
final ArrowType fixedArrowType;
if (logicalType instanceof LogicalTypes.Decimal) {
fixedArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType, schema);
} else {
fixedArrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize());
}
fieldType = createFieldType(nullable, fixedArrowType, schema, externalProps, config);
break;
case INT:
final ArrowType intArrowType;
if (logicalType instanceof LogicalTypes.Date) {
intArrowType = new ArrowType.Date(DateUnit.DAY);
} else if (logicalType instanceof LogicalTypes.TimeMillis) {
intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
} else {
intArrowType = new ArrowType.Int(32, /* isSigned= */ true);
}
fieldType = createFieldType(nullable, intArrowType, schema, externalProps, config);
break;
case BOOLEAN:
fieldType = createFieldType(nullable, new ArrowType.Bool(), schema, externalProps, config);
break;
case LONG:
final ArrowType longArrowType;
if (logicalType instanceof LogicalTypes.TimeMicros) {
longArrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64);
} else if (logicalType instanceof LogicalTypes.TimestampMillis) {
// In legacy mode the timestamp-xxx types are treated as local
String tz = config.isLegacyMode() ? null : "UTC";
longArrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, tz);
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
String tz = config.isLegacyMode() ? null : "UTC";
longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, tz);
} else if (logicalType instanceof LogicalTypes.TimestampNanos) {
String tz = config.isLegacyMode() ? null : "UTC";
longArrowType = new ArrowType.Timestamp(TimeUnit.NANOSECOND, tz);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMillis
&& !config.isLegacyMode()) {
// In legacy mode the local-timestamp-xxx types are not recognized (result is just type =
// long)
longArrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMicros
&& !config.isLegacyMode()) {
longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null);
} else if (logicalType instanceof LogicalTypes.LocalTimestampNanos
&& !config.isLegacyMode()) {
longArrowType = new ArrowType.Timestamp(TimeUnit.NANOSECOND, null);
} else {
longArrowType = new ArrowType.Int(64, /* isSigned= */ true);
}
fieldType = createFieldType(nullable, longArrowType, schema, externalProps, config);
break;
case FLOAT:
fieldType =
createFieldType(
nullable, new ArrowType.FloatingPoint(SINGLE), schema, externalProps, config);
break;
case DOUBLE:
fieldType =
createFieldType(
nullable, new ArrowType.FloatingPoint(DOUBLE), schema, externalProps, config);
break;
case BYTES:
final ArrowType bytesArrowType;
if (logicalType instanceof LogicalTypes.Decimal) {
bytesArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType, schema);
} else {
bytesArrowType = new ArrowType.Binary();
}
fieldType = createFieldType(nullable, bytesArrowType, schema, externalProps, config);
break;
case NULL:
fieldType = createFieldType(ArrowType.Null.INSTANCE, schema, externalProps, config);
break;
default:
// no-op, shouldn't get here
throw new UnsupportedOperationException();
}
if (name == null) {
name = getDefaultFieldName(fieldType.getType());
}
if (name.contains(".") && !config.isLegacyMode()) {
// Do not include namespace as part of the field name
name = name.substring(name.lastIndexOf(".") + 1);
}
return new Field(name, fieldType, children.size() == 0 ? null : children);
}