in core/src/main/java/org/apache/iceberg/PositionDeletesTable.java [114:200]
private Schema calculateSchema() {
int formatVersion = TableUtil.formatVersion(table());
Types.StructType partitionType = Partitioning.partitionType(table());
ImmutableList.Builder<Types.NestedField> builder =
ImmutableList.<Types.NestedField>builder()
.add(MetadataColumns.DELETE_FILE_PATH)
.add(MetadataColumns.DELETE_FILE_POS)
.add(
Types.NestedField.optional(
MetadataColumns.DELETE_FILE_ROW_FIELD_ID,
MetadataColumns.DELETE_FILE_ROW_FIELD_NAME,
table().schema().asStruct(),
MetadataColumns.DELETE_FILE_ROW_DOC))
.add(
Types.NestedField.required(
MetadataColumns.PARTITION_COLUMN_ID,
PARTITION,
partitionType,
"Partition that position delete row belongs to"))
.add(
Types.NestedField.required(
MetadataColumns.SPEC_ID_COLUMN_ID,
SPEC_ID,
Types.IntegerType.get(),
MetadataColumns.SPEC_ID_COLUMN_DOC))
.add(
Types.NestedField.required(
MetadataColumns.FILE_PATH_COLUMN_ID,
DELETE_FILE_PATH,
Types.StringType.get(),
MetadataColumns.FILE_PATH_COLUMN_DOC));
if (formatVersion >= 3) {
builder
.add(
Types.NestedField.optional(
MetadataColumns.CONTENT_OFFSET_COLUMN_ID,
CONTENT_OFFSET,
Types.LongType.get(),
"The offset in the DV where the content starts"))
.add(
Types.NestedField.optional(
MetadataColumns.CONTENT_SIZE_IN_BYTES_COLUMN_ID,
CONTENT_SIZE_IN_BYTES,
Types.LongType.get(),
"The length in bytes of the DV blob"));
}
List<Types.NestedField> columns = builder.build();
// Calculate used ids (for de-conflict)
Set<Integer> currentlyUsedIds =
Collections.unmodifiableSet(TypeUtil.indexById(Types.StructType.of(columns)).keySet());
Set<Integer> allUsedIds =
table().schemas().values().stream()
.map(currSchema -> TypeUtil.indexById(currSchema.asStruct()).keySet())
.reduce(currentlyUsedIds, Sets::union);
// Calculate ids to reassign
Set<Integer> idsToReassign =
partitionType.fields().stream().map(Types.NestedField::fieldId).collect(Collectors.toSet());
// Reassign selected ids to de-conflict with used ids.
AtomicInteger nextId = new AtomicInteger();
Schema result =
new Schema(
columns,
ImmutableSet.of(),
oldId -> {
if (!idsToReassign.contains(oldId)) {
return oldId;
}
int candidate = nextId.incrementAndGet();
while (allUsedIds.contains(candidate)) {
candidate = nextId.incrementAndGet();
}
return candidate;
});
if (!partitionType.fields().isEmpty()) {
return result;
} else {
// avoid returning an empty struct, which is not always supported.
// instead, drop the partition field
return TypeUtil.selectNot(result, Sets.newHashSet(MetadataColumns.PARTITION_COLUMN_ID));
}
}