in orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java [309:407]
private static TypeDescription buildOrcProjection(
Schema root, Integer fieldId, Type type, boolean isRequired, Map<Integer, OrcField> mapping) {
final TypeDescription orcType;
switch (type.typeId()) {
case STRUCT:
orcType = TypeDescription.createStruct();
for (Types.NestedField nestedField : type.asStructType().fields()) {
// Using suffix _r to avoid potential underlying issues in ORC reader
// with reused column names between ORC and Iceberg;
// e.g. renaming column c -> d and adding new column d
String name =
Optional.ofNullable(mapping.get(nestedField.fieldId()))
.map(OrcField::name)
.orElseGet(() -> nestedField.name() + "_r" + nestedField.fieldId());
TypeDescription childType =
buildOrcProjection(
root,
nestedField.fieldId(),
nestedField.type(),
isRequired && nestedField.isRequired(),
mapping);
if (childType != null) {
orcType.addField(name, childType);
}
}
break;
case LIST:
Types.ListType list = (Types.ListType) type;
TypeDescription elementType =
buildOrcProjection(
root,
list.elementId(),
list.elementType(),
isRequired && list.isElementRequired(),
mapping);
Preconditions.checkArgument(elementType != null, "Invalid element type: unknown");
orcType = TypeDescription.createList(elementType);
break;
case MAP:
Types.MapType map = (Types.MapType) type;
TypeDescription keyType =
buildOrcProjection(root, map.keyId(), map.keyType(), isRequired, mapping);
TypeDescription valueType =
buildOrcProjection(
root, map.valueId(), map.valueType(), isRequired && map.isValueRequired(), mapping);
Preconditions.checkArgument(keyType != null, "Invalid key type: unknown");
Preconditions.checkArgument(valueType != null, "Invalid value type: unknown");
orcType = TypeDescription.createMap(keyType, valueType);
break;
case VARIANT:
orcType = TypeDescription.createStruct();
orcType.addField(VARIANT_METADATA, TypeDescription.createBinary());
orcType.addField(VARIANT_VALUE, TypeDescription.createBinary());
orcType.setAttribute(ICEBERG_STRUCT_TYPE_ATTRIBUTE, VARIANT);
break;
default:
if (mapping.containsKey(fieldId)) {
TypeDescription originalType = mapping.get(fieldId).type();
Optional<TypeDescription> promotedType = getPromotedType(type, originalType);
if (promotedType.isPresent()) {
orcType = promotedType.get();
} else {
Preconditions.checkArgument(
isSameType(originalType, type),
"Can not promote %s type to %s",
originalType.getCategory(),
type.typeId().name());
orcType = originalType.clone();
}
} else {
Types.NestedField field = root.findField(fieldId);
if (isRequired) {
Preconditions.checkArgument(
field.initialDefault() != null,
"Missing required field: %s (%s)",
root.findColumnName(fieldId),
type);
}
if (field.initialDefault() != null) {
throw new UnsupportedOperationException(
String.format(
"ORC cannot read default value for field %s (%s): %s",
root.findColumnName(fieldId), type, field.initialDefault()));
}
orcType = convert(fieldId, type, false);
}
}
if (orcType != null) {
orcType.setAttribute(ICEBERG_ID_ATTRIBUTE, fieldId.toString());
}
return orcType;
}