in adapter/avro/src/main/java/org/apache/arrow/adapter/avro/ArrowToAvroUtils.java [259:354]
private static <T> T buildBaseTypeSchema(
SchemaBuilder.BaseTypeBuilder<T> builder, Field field, String namespace) {
ArrowType.ArrowTypeID typeID = field.getType().getTypeID();
switch (typeID) {
case Null:
return builder.nullType();
case Bool:
return builder.booleanType();
case Int:
ArrowType.Int intType = (ArrowType.Int) field.getType();
if (intType.getBitWidth() > 32 || (intType.getBitWidth() == 32 && !intType.getIsSigned())) {
return builder.longType();
} else {
return builder.intType();
}
case FloatingPoint:
ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) field.getType();
if (floatType.getPrecision() == FloatingPointPrecision.DOUBLE) {
return builder.doubleType();
} else {
return builder.floatType();
}
case Utf8:
return builder.stringType();
case Binary:
return builder.bytesType();
case FixedSizeBinary:
ArrowType.FixedSizeBinary fixedType = (ArrowType.FixedSizeBinary) field.getType();
String fixedTypeName = field.getName();
int fixedTypeWidth = fixedType.getByteWidth();
return builder.fixed(fixedTypeName).size(fixedTypeWidth);
case Decimal:
ArrowType.Decimal decimalType = (ArrowType.Decimal) field.getType();
return builder.type(
LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale())
.addToSchema(
Schema.createFixed(
field.getName(), namespace, "", decimalType.getBitWidth() / 8)));
case Date:
return builder.type(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)));
case Time:
ArrowType.Time timeType = (ArrowType.Time) field.getType();
if ((timeType.getUnit() == TimeUnit.SECOND || timeType.getUnit() == TimeUnit.MILLISECOND)) {
// Second and millisecond time types are encoded as time-millis (INT)
return builder.type(
LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT)));
} else {
// All other time types (micro, nano) are encoded as time-micros (LONG)
return builder.type(
LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG)));
}
case Timestamp:
ArrowType.Timestamp timestampType = (ArrowType.Timestamp) field.getType();
LogicalType timestampLogicalType = timestampLogicalType(timestampType);
return builder.type(timestampLogicalType.addToSchema(Schema.create(Schema.Type.LONG)));
case Struct:
String childNamespace =
namespace == null ? field.getName() : namespace + "." + field.getName();
return buildRecordSchema(
builder.record(field.getName()), field.getChildren(), childNamespace);
case List:
case FixedSizeList:
// Arrow uses "$data$" as the field name for list items, that is not a valid Avro name
Field itemField = field.getChildren().get(0);
if (ListVector.DATA_VECTOR_NAME.equals(itemField.getName())) {
Field safeItemField =
new Field("item", itemField.getFieldType(), itemField.getChildren());
Field safeListField =
new Field(field.getName(), field.getFieldType(), List.of(safeItemField));
return buildArraySchema(builder.array(), safeListField, namespace);
} else {
return buildArraySchema(builder.array(), field, namespace);
}
case Map:
return buildMapSchema(builder.map(), field, namespace);
default:
throw new IllegalArgumentException(
"Element type not supported for Avro conversion: " + typeID.name());
}
}