in gobblin-utility/src/main/java/org/apache/gobblin/util/orc/AvroOrcSchemaConverter.java [30:92]
public static TypeDescription getOrcSchema(Schema avroSchema) {
final Schema.Type type = avroSchema.getType();
switch (type) {
case NULL:
// empty union represents null type
final TypeDescription nullUnion = TypeDescription.createUnion();
return nullUnion;
case LONG:
return TypeDescription.createLong();
case INT:
return TypeDescription.createInt();
case BYTES:
case FIXED:
return getTypeDescriptionForBinarySchema(avroSchema);
case ARRAY:
return TypeDescription.createList(getOrcSchema(avroSchema.getElementType()));
case RECORD:
final TypeDescription recordStruct = TypeDescription.createStruct();
for (Schema.Field field2 : avroSchema.getFields()) {
final Schema fieldSchema = field2.schema();
final TypeDescription fieldType = getOrcSchema(fieldSchema);
if (fieldType != null) {
recordStruct.addField(field2.name(), fieldType);
} else {
throw new IllegalStateException("Should never get a null type as fieldType.");
}
}
return recordStruct;
case MAP:
return TypeDescription.createMap(
// in Avro maps, keys are always strings
TypeDescription.createString(), getOrcSchema(avroSchema.getValueType()));
case UNION:
final List<Schema> nonNullMembers = getNonNullMembersOfUnion(avroSchema);
if (isNullableUnion(avroSchema, nonNullMembers)) {
// a single non-null union member
// this is how Avro represents "nullable" types; as a union of the NULL type with another
// since ORC already supports nullability of all types, just use the child type directly
return getOrcSchema(nonNullMembers.get(0));
} else {
// not a nullable union type; represent as an actual ORC union of them
final TypeDescription union = TypeDescription.createUnion();
for (final Schema childSchema : nonNullMembers) {
union.addUnionChild(getOrcSchema(childSchema));
}
return union;
}
case STRING:
return TypeDescription.createString();
case FLOAT:
return TypeDescription.createFloat();
case DOUBLE:
return TypeDescription.createDouble();
case BOOLEAN:
return TypeDescription.createBoolean();
case ENUM:
// represent as String for now
return TypeDescription.createString();
default:
throw new IllegalStateException(String.format("Unrecognized Avro type: %s", type.getName()));
}
}