in parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java [143:386]
private TypeMapping fromArrow(final Field field, final String fieldName) {
final List<Field> children = field.getChildren();
return field.getType().accept(new ArrowTypeVisitor<TypeMapping>() {
@Override
public TypeMapping visit(Null type) {
// TODO(PARQUET-757): null original type
return primitive(BINARY);
}
@Override
public TypeMapping visit(Struct type) {
List<TypeMapping> parquetTypes = fromArrow(children);
return new StructTypeMapping(
field,
addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName),
parquetTypes);
}
@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return createListTypeMapping();
}
@Override
public TypeMapping visit(ArrowType.LargeList largeList) {
return createListTypeMapping();
}
@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
return createListTypeMapping();
}
@Override
public TypeMapping visit(ArrowType.ListView type) {
return createListTypeMapping();
}
private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly one child: " + field);
}
TypeMapping parquetChild = fromArrow(children.get(0), "element");
GroupType list = Types.optionalList()
.element(parquetChild.getParquetType())
.named(fieldName);
return new ListTypeMapping(field, new List3Levels(list), parquetChild);
}
@Override
public TypeMapping visit(Union type) {
// TODO(PARQUET-756): add Union OriginalType
List<TypeMapping> parquetTypes = fromArrow(children);
return new UnionTypeMapping(
field,
addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName),
parquetTypes);
}
@Override
public TypeMapping visit(ArrowType.Map map) {
if (children.size() != 2) {
throw new IllegalArgumentException("Map fields must have exactly two children: " + field);
}
TypeMapping keyChild = fromArrow(children.get(0), "key");
TypeMapping valueChild = fromArrow(children.get(1), "value");
GroupType groupType = Types.optionalMap()
.key(keyChild.getParquetType())
.value(valueChild.getParquetType())
.named(fieldName);
return new SchemaMapping.MapTypeMapping(field, new Map3Levels(groupType), keyChild, valueChild);
}
@Override
public TypeMapping visit(Int type) {
boolean signed = type.getIsSigned();
switch (type.getBitWidth()) {
case 8:
case 16:
case 32:
return primitive(INT32, intType(type.getBitWidth(), signed));
case 64:
return primitive(INT64, intType(64, signed));
default:
throw new IllegalArgumentException("Illegal int type: " + field);
}
}
@Override
public TypeMapping visit(FloatingPoint type) {
switch (type.getPrecision()) {
case HALF:
// TODO(PARQUET-757): original type HalfFloat
return primitive(FLOAT);
case SINGLE:
return primitive(FLOAT);
case DOUBLE:
return primitive(DOUBLE);
default:
throw new IllegalArgumentException("Illegal float type: " + field);
}
}
@Override
public TypeMapping visit(Utf8 type) {
return primitive(BINARY, stringType());
}
@Override
public TypeMapping visit(ArrowType.LargeUtf8 largeUtf8) {
return primitive(BINARY, stringType());
}
@Override
public TypeMapping visit(ArrowType.Utf8View type) {
return primitive(BINARY, stringType());
}
@Override
public TypeMapping visit(Binary type) {
return primitive(BINARY);
}
@Override
public TypeMapping visit(ArrowType.BinaryView type) {
return primitive(BINARY);
}
@Override
public TypeMapping visit(ArrowType.LargeBinary largeBinary) {
return primitive(BINARY);
}
@Override
public TypeMapping visit(Bool type) {
return primitive(BOOLEAN);
}
/**
* See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
* @param type an arrow decimal type
* @return a mapping from the arrow decimal to the Parquet type
*/
@Override
public TypeMapping visit(Decimal type) {
int precision = type.getPrecision();
int scale = type.getScale();
if (1 <= precision && precision <= 9) {
return decimal(INT32, precision, scale);
} else if (1 <= precision && precision <= 18) {
return decimal(INT64, precision, scale);
} else {
// Better: FIXED_LENGTH_BYTE_ARRAY with length
return decimal(BINARY, precision, scale);
}
}
@Override
public TypeMapping visit(Date type) {
return primitive(INT32, dateType());
}
@Override
public TypeMapping visit(Time type) {
int bitWidth = type.getBitWidth();
TimeUnit timeUnit = type.getUnit();
if (bitWidth == 32 && timeUnit == TimeUnit.MILLISECOND) {
return primitive(INT32, timeType(false, MILLIS));
} else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) {
return primitive(INT64, timeType(false, MICROS));
} else if (bitWidth == 64 && timeUnit == TimeUnit.NANOSECOND) {
return primitive(INT64, timeType(false, NANOS));
}
throw new UnsupportedOperationException("Unsupported type " + type);
}
@Override
public TypeMapping visit(Timestamp type) {
TimeUnit timeUnit = type.getUnit();
if (timeUnit == TimeUnit.MILLISECOND) {
return primitive(INT64, timestampType(isUtcNormalized(type), MILLIS));
} else if (timeUnit == TimeUnit.MICROSECOND) {
return primitive(INT64, timestampType(isUtcNormalized(type), MICROS));
} else if (timeUnit == TimeUnit.NANOSECOND) {
return primitive(INT64, timestampType(isUtcNormalized(type), NANOS));
}
throw new UnsupportedOperationException("Unsupported type " + type);
}
private boolean isUtcNormalized(Timestamp timestamp) {
String timeZone = timestamp.getTimezone();
return timeZone != null && !timeZone.isEmpty();
}
/**
* See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval
*/
@Override
public TypeMapping visit(Interval type) {
// TODO(PARQUET-675): fix interval original types
return primitiveFLBA(12, LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance());
}
@Override
public TypeMapping visit(ArrowType.Duration duration) {
return primitiveFLBA(12, LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance());
}
@Override
public TypeMapping visit(ArrowType.ExtensionType type) {
return ArrowTypeVisitor.super.visit(type);
}
@Override
public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
return primitive(BINARY);
}
private TypeMapping mapping(PrimitiveType parquetType) {
return new PrimitiveTypeMapping(field, parquetType);
}
private TypeMapping decimal(PrimitiveTypeName type, int precision, int scale) {
return mapping(
Types.optional(type).as(decimalType(scale, precision)).named(fieldName));
}
private TypeMapping primitive(PrimitiveTypeName type) {
return mapping(Types.optional(type).named(fieldName));
}
private TypeMapping primitive(PrimitiveTypeName type, LogicalTypeAnnotation otype) {
return mapping(Types.optional(type).as(otype).named(fieldName));
}
private TypeMapping primitiveFLBA(int length, LogicalTypeAnnotation otype) {
return mapping(Types.optional(FIXED_LEN_BYTE_ARRAY)
.length(length)
.as(otype)
.named(fieldName));
}
});
}