in parquet-cli/src/main/java/org/apache/parquet/cli/util/Schemas.java [286:365]
private static Schema mergeOnly(Schema left, Schema right) {
if (Objects.equal(left, right)) {
return left;
}
// handle primitive type promotion; doesn't promote integers to floats
switch (left.getType()) {
case INT:
if (right.getType() == Schema.Type.LONG) {
return right;
}
break;
case LONG:
if (right.getType() == Schema.Type.INT) {
return left;
}
break;
case FLOAT:
if (right.getType() == Schema.Type.DOUBLE) {
return right;
}
break;
case DOUBLE:
if (right.getType() == Schema.Type.FLOAT) {
return left;
}
}
// any other cases where the types don't match must be combined by a union
if (left.getType() != right.getType()) {
return null;
}
switch (left.getType()) {
case UNION:
return union(left, right);
case RECORD:
if (left.getName() == null
&& right.getName() == null
&& fieldSimilarity(left, right) < SIMILARITY_THRESH) {
return null;
} else if (!Objects.equal(left.getName(), right.getName())) {
return null;
}
Schema combinedRecord = Schema.createRecord(
coalesce(left.getName(), right.getName()),
coalesce(left.getDoc(), right.getDoc()),
coalesce(left.getNamespace(), right.getNamespace()),
false);
combinedRecord.setFields(mergeFields(left, right));
return combinedRecord;
case MAP:
return Schema.createMap(mergeOrUnion(left.getValueType(), right.getValueType()));
case ARRAY:
return Schema.createArray(mergeOrUnion(left.getElementType(), right.getElementType()));
case ENUM:
if (!Objects.equal(left.getName(), right.getName())) {
return null;
}
Set<String> symbols = Sets.newLinkedHashSet();
symbols.addAll(left.getEnumSymbols());
symbols.addAll(right.getEnumSymbols());
return Schema.createEnum(
left.getName(),
coalesce(left.getDoc(), right.getDoc()),
coalesce(left.getNamespace(), right.getNamespace()),
ImmutableList.copyOf(symbols));
default:
// all primitives are handled before the switch by the equality check.
// schemas that reach this point are not primitives and also not any of
// the above known types.
throw new UnsupportedOperationException("Unknown schema type: " + left.getType());
}
}