private Schema calculateSchema()

in core/src/main/java/org/apache/iceberg/PositionDeletesTable.java [114:200]


  private Schema calculateSchema() {
    int formatVersion = TableUtil.formatVersion(table());
    Types.StructType partitionType = Partitioning.partitionType(table());
    ImmutableList.Builder<Types.NestedField> builder =
        ImmutableList.<Types.NestedField>builder()
            .add(MetadataColumns.DELETE_FILE_PATH)
            .add(MetadataColumns.DELETE_FILE_POS)
            .add(
                Types.NestedField.optional(
                    MetadataColumns.DELETE_FILE_ROW_FIELD_ID,
                    MetadataColumns.DELETE_FILE_ROW_FIELD_NAME,
                    table().schema().asStruct(),
                    MetadataColumns.DELETE_FILE_ROW_DOC))
            .add(
                Types.NestedField.required(
                    MetadataColumns.PARTITION_COLUMN_ID,
                    PARTITION,
                    partitionType,
                    "Partition that position delete row belongs to"))
            .add(
                Types.NestedField.required(
                    MetadataColumns.SPEC_ID_COLUMN_ID,
                    SPEC_ID,
                    Types.IntegerType.get(),
                    MetadataColumns.SPEC_ID_COLUMN_DOC))
            .add(
                Types.NestedField.required(
                    MetadataColumns.FILE_PATH_COLUMN_ID,
                    DELETE_FILE_PATH,
                    Types.StringType.get(),
                    MetadataColumns.FILE_PATH_COLUMN_DOC));

    if (formatVersion >= 3) {
      builder
          .add(
              Types.NestedField.optional(
                  MetadataColumns.CONTENT_OFFSET_COLUMN_ID,
                  CONTENT_OFFSET,
                  Types.LongType.get(),
                  "The offset in the DV where the content starts"))
          .add(
              Types.NestedField.optional(
                  MetadataColumns.CONTENT_SIZE_IN_BYTES_COLUMN_ID,
                  CONTENT_SIZE_IN_BYTES,
                  Types.LongType.get(),
                  "The length in bytes of the DV blob"));
    }

    List<Types.NestedField> columns = builder.build();

    // Calculate used ids (for de-conflict)
    Set<Integer> currentlyUsedIds =
        Collections.unmodifiableSet(TypeUtil.indexById(Types.StructType.of(columns)).keySet());
    Set<Integer> allUsedIds =
        table().schemas().values().stream()
            .map(currSchema -> TypeUtil.indexById(currSchema.asStruct()).keySet())
            .reduce(currentlyUsedIds, Sets::union);

    // Calculate ids to reassign
    Set<Integer> idsToReassign =
        partitionType.fields().stream().map(Types.NestedField::fieldId).collect(Collectors.toSet());

    // Reassign selected ids to de-conflict with used ids.
    AtomicInteger nextId = new AtomicInteger();
    Schema result =
        new Schema(
            columns,
            ImmutableSet.of(),
            oldId -> {
              if (!idsToReassign.contains(oldId)) {
                return oldId;
              }
              int candidate = nextId.incrementAndGet();
              while (allUsedIds.contains(candidate)) {
                candidate = nextId.incrementAndGet();
              }
              return candidate;
            });

    if (!partitionType.fields().isEmpty()) {
      return result;
    } else {
      // avoid returning an empty struct, which is not always supported.
      // instead, drop the partition field
      return TypeUtil.selectNot(result, Sets.newHashSet(MetadataColumns.PARTITION_COLUMN_ID));
    }
  }