hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [275:593]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            batchSize);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
        HeapTimestampVector tv = new HeapTimestampVector(batchSize);
        if (value == null) {
          tv.fillWithNulls();
        } else {
          tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value));
        }
        return tv;
      case ARRAY:
        HeapArrayVector arrayVector = new HeapArrayVector(batchSize);
        if (value == null) {
          arrayVector.fillWithNulls();
          return arrayVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create array with default value.");
        }
      case MAP:
        HeapMapColumnVector mapVector = new HeapMapColumnVector(batchSize, null, null);
        if (value == null) {
          mapVector.fillWithNulls();
          return mapVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create map with default value.");
        }
      case ROW:
        HeapRowColumnVector rowVector = new HeapRowColumnVector(batchSize);
        if (value == null) {
          rowVector.fillWithNulls();
          return rowVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create row with default value.");
        }
      default:
        throw new UnsupportedOperationException("Unsupported type: " + type);
    }
  }

  private static List<ColumnDescriptor> filterDescriptors(int depth, Type type, List<ColumnDescriptor> columns) throws ParquetRuntimeException {
    List<ColumnDescriptor> filtered = new ArrayList<>();
    for (ColumnDescriptor descriptor : columns) {
      if (depth >= descriptor.getPath().length) {
        throw new InvalidSchemaException("Expect depth " + depth + " for schema: " + descriptor);
      }
      if (type.getName().equals(descriptor.getPath()[depth])) {
        filtered.add(descriptor);
      }
    }
    ValidationUtils.checkState(filtered.size() > 0, "Corrupted Parquet schema");
    return filtered;
  }

  public static ColumnReader createColumnReader(
      boolean utcTimestamp,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> descriptors,
      PageReadStore pages) throws IOException {
    return createColumnReader(utcTimestamp, fieldType, physicalType, descriptors,
        pages, 0);
  }

  private static ColumnReader createColumnReader(
      boolean utcTimestamp,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> columns,
      PageReadStore pages,
      int depth) throws IOException {
    List<ColumnDescriptor> descriptors = filterDescriptors(depth, physicalType, columns);
    ColumnDescriptor descriptor = descriptors.get(0);
    PageReader pageReader = pages.getPageReader(descriptor);
    switch (fieldType.getTypeRoot()) {
      case BOOLEAN:
        return new BooleanColumnReader(descriptor, pageReader);
      case TINYINT:
        return new ByteColumnReader(descriptor, pageReader);
      case DOUBLE:
        return new DoubleColumnReader(descriptor, pageReader);
      case FLOAT:
        return new FloatColumnReader(descriptor, pageReader);
      case INTEGER:
      case DATE:
      case TIME_WITHOUT_TIME_ZONE:
        return new IntColumnReader(descriptor, pageReader);
      case BIGINT:
        return new LongColumnReader(descriptor, pageReader);
      case SMALLINT:
        return new ShortColumnReader(descriptor, pageReader);
      case CHAR:
      case VARCHAR:
      case BINARY:
      case VARBINARY:
        return new BytesColumnReader(descriptor, pageReader);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
      case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
          case INT64:
            int precision = fieldType instanceof TimestampType
                ? ((TimestampType) fieldType).getPrecision()
                : ((LocalZonedTimestampType) fieldType).getPrecision();
            return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, precision);
          case INT96:
            return new TimestampColumnReader(utcTimestamp, descriptor, pageReader);
          default:
            throw new AssertionError();
        }
      case DECIMAL:
        switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
          case INT32:
            return new IntColumnReader(descriptor, pageReader);
          case INT64:
            return new LongColumnReader(descriptor, pageReader);
          case BINARY:
            return new BytesColumnReader(descriptor, pageReader);
          case FIXED_LEN_BYTE_ARRAY:
            return new FixedLenBytesColumnReader(
                descriptor, pageReader);
          default:
            throw new AssertionError();
        }
      case ARRAY:
        return new ArrayColumnReader(
            descriptor,
            pageReader,
            utcTimestamp,
            descriptor.getPrimitiveType(),
            fieldType);
      case MAP:
        MapType mapType = (MapType) fieldType;
        ArrayColumnReader keyReader =
            new ArrayColumnReader(
                descriptor,
                pageReader,
                utcTimestamp,
                descriptor.getPrimitiveType(),
                new ArrayType(mapType.getKeyType()));
        ArrayColumnReader valueReader =
            new ArrayColumnReader(
                descriptors.get(1),
                pages.getPageReader(descriptors.get(1)),
                utcTimestamp,
                descriptors.get(1).getPrimitiveType(),
                new ArrayType(mapType.getValueType()));
        return new MapColumnReader(keyReader, valueReader, fieldType);
      case ROW:
        RowType rowType = (RowType) fieldType;
        GroupType groupType = physicalType.asGroupType();
        List<ColumnReader> fieldReaders = new ArrayList<>();
        for (int i = 0; i < rowType.getFieldCount(); i++) {
          // schema evolution: read the parquet file with a new extended field name.
          int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType);
          if (fieldIndex < 0) {
            fieldReaders.add(new EmptyColumnReader());
          } else {
            fieldReaders.add(
                createColumnReader(
                    utcTimestamp,
                    rowType.getTypeAt(i),
                    groupType.getType(fieldIndex),
                    descriptors,
                    pages,
                    depth + 1));
          }
        }
        return new RowColumnReader(fieldReaders);
      default:
        throw new UnsupportedOperationException(fieldType + " is not supported now.");
    }
  }

  public static WritableColumnVector createWritableColumnVector(
      int batchSize,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> descriptors) {
    return createWritableColumnVector(batchSize, fieldType, physicalType, descriptors, 0);
  }

  private static WritableColumnVector createWritableColumnVector(
      int batchSize,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> columns,
      int depth) {
    List<ColumnDescriptor> descriptors = filterDescriptors(depth, physicalType, columns);
    PrimitiveType primitiveType = descriptors.get(0).getPrimitiveType();
    PrimitiveType.PrimitiveTypeName typeName = primitiveType.getPrimitiveTypeName();
    switch (fieldType.getTypeRoot()) {
      case BOOLEAN:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapBooleanVector(batchSize);
      case TINYINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapByteVector(batchSize);
      case DOUBLE:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapDoubleVector(batchSize);
      case FLOAT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapFloatVector(batchSize);
      case INTEGER:
      case DATE:
      case TIME_WITHOUT_TIME_ZONE:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapIntVector(batchSize);
      case BIGINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapLongVector(batchSize);
      case SMALLINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapShortVector(batchSize);
      case CHAR:
      case VARCHAR:
      case BINARY:
      case VARBINARY:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapBytesVector(batchSize);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
      case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS,
                getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType));
        return new HeapTimestampVector(batchSize);
      case DECIMAL:
        checkArgument(
            (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
                || typeName == PrimitiveType.PrimitiveTypeName.BINARY)
                && primitiveType.getOriginalType() == OriginalType.DECIMAL,
                getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapDecimalVector(batchSize);
      case ARRAY:
        ArrayType arrayType = (ArrayType) fieldType;
        return new HeapArrayVector(
            batchSize,
            createWritableColumnVector(
                batchSize,
                arrayType.getElementType(),
                physicalType,
                descriptors,
                depth));
      case MAP:
        MapType mapType = (MapType) fieldType;
        GroupType repeatedType = physicalType.asGroupType().getType(0).asGroupType();
        // the map column has three level paths.
        return new HeapMapColumnVector(
            batchSize,
            createWritableColumnVector(
                batchSize,
                mapType.getKeyType(),
                repeatedType.getType(0),
                descriptors,
                depth + 2),
            createWritableColumnVector(
                batchSize,
                mapType.getValueType(),
                repeatedType.getType(1),
                descriptors,
                depth + 2));
      case ROW:
        RowType rowType = (RowType) fieldType;
        GroupType groupType = physicalType.asGroupType();
        WritableColumnVector[] columnVectors = new WritableColumnVector[rowType.getFieldCount()];
        for (int i = 0; i < columnVectors.length; i++) {
          // schema evolution: read the file with a new extended field name.
          int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType);
          if (fieldIndex < 0) {
            columnVectors[i] = (WritableColumnVector) createVectorFromConstant(rowType.getTypeAt(i), null, batchSize);
          } else {
            columnVectors[i] =
                createWritableColumnVector(
                    batchSize,
                    rowType.getTypeAt(i),
                    groupType.getType(fieldIndex),
                    descriptors,
                    depth + 1);
          }
        }
        return new HeapRowColumnVector(batchSize, columnVectors);
      default:
        throw new UnsupportedOperationException(fieldType + " is not supported now.");
    }
  }

  /**
   * Returns the field index with given physical row type {@code groupType} and field name {@code fieldName}.
   *
   * @return The physical field index or -1 if the field does not exist
   */
  private static int getFieldIndexInPhysicalType(String fieldName, GroupType groupType) {
    // get index from fileSchema type, else, return -1
    return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1;
  }

  /**
   * Construct the error message when primitive type mismatches.
   * @param primitiveType Primitive type
   * @param fieldType Logical field type
   * @return The error message
   */
  private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) {
    return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name());
  }

  /**
   * Construct the error message when original type mismatches.
   * @param originalType Original type
   * @param fieldType Logical field type
   * @return The error message
   */
  private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) {
    return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name());
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java [275:593]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            batchSize);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
        HeapTimestampVector tv = new HeapTimestampVector(batchSize);
        if (value == null) {
          tv.fillWithNulls();
        } else {
          tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value));
        }
        return tv;
      case ARRAY:
        HeapArrayVector arrayVector = new HeapArrayVector(batchSize);
        if (value == null) {
          arrayVector.fillWithNulls();
          return arrayVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create array with default value.");
        }
      case MAP:
        HeapMapColumnVector mapVector = new HeapMapColumnVector(batchSize, null, null);
        if (value == null) {
          mapVector.fillWithNulls();
          return mapVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create map with default value.");
        }
      case ROW:
        HeapRowColumnVector rowVector = new HeapRowColumnVector(batchSize);
        if (value == null) {
          rowVector.fillWithNulls();
          return rowVector;
        } else {
          throw new UnsupportedOperationException("Unsupported create row with default value.");
        }
      default:
        throw new UnsupportedOperationException("Unsupported type: " + type);
    }
  }

  private static List<ColumnDescriptor> filterDescriptors(int depth, Type type, List<ColumnDescriptor> columns) throws ParquetRuntimeException {
    List<ColumnDescriptor> filtered = new ArrayList<>();
    for (ColumnDescriptor descriptor : columns) {
      if (depth >= descriptor.getPath().length) {
        throw new InvalidSchemaException("Expect depth " + depth + " for schema: " + descriptor);
      }
      if (type.getName().equals(descriptor.getPath()[depth])) {
        filtered.add(descriptor);
      }
    }
    ValidationUtils.checkState(filtered.size() > 0, "Corrupted Parquet schema");
    return filtered;
  }

  public static ColumnReader createColumnReader(
      boolean utcTimestamp,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> descriptors,
      PageReadStore pages) throws IOException {
    return createColumnReader(utcTimestamp, fieldType, physicalType, descriptors,
        pages, 0);
  }

  private static ColumnReader createColumnReader(
      boolean utcTimestamp,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> columns,
      PageReadStore pages,
      int depth) throws IOException {
    List<ColumnDescriptor> descriptors = filterDescriptors(depth, physicalType, columns);
    ColumnDescriptor descriptor = descriptors.get(0);
    PageReader pageReader = pages.getPageReader(descriptor);
    switch (fieldType.getTypeRoot()) {
      case BOOLEAN:
        return new BooleanColumnReader(descriptor, pageReader);
      case TINYINT:
        return new ByteColumnReader(descriptor, pageReader);
      case DOUBLE:
        return new DoubleColumnReader(descriptor, pageReader);
      case FLOAT:
        return new FloatColumnReader(descriptor, pageReader);
      case INTEGER:
      case DATE:
      case TIME_WITHOUT_TIME_ZONE:
        return new IntColumnReader(descriptor, pageReader);
      case BIGINT:
        return new LongColumnReader(descriptor, pageReader);
      case SMALLINT:
        return new ShortColumnReader(descriptor, pageReader);
      case CHAR:
      case VARCHAR:
      case BINARY:
      case VARBINARY:
        return new BytesColumnReader(descriptor, pageReader);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
      case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
          case INT64:
            int precision = fieldType instanceof TimestampType
                ? ((TimestampType) fieldType).getPrecision()
                : ((LocalZonedTimestampType) fieldType).getPrecision();
            return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, precision);
          case INT96:
            return new TimestampColumnReader(utcTimestamp, descriptor, pageReader);
          default:
            throw new AssertionError();
        }
      case DECIMAL:
        switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
          case INT32:
            return new IntColumnReader(descriptor, pageReader);
          case INT64:
            return new LongColumnReader(descriptor, pageReader);
          case BINARY:
            return new BytesColumnReader(descriptor, pageReader);
          case FIXED_LEN_BYTE_ARRAY:
            return new FixedLenBytesColumnReader(
                descriptor, pageReader);
          default:
            throw new AssertionError();
        }
      case ARRAY:
        return new ArrayColumnReader(
            descriptor,
            pageReader,
            utcTimestamp,
            descriptor.getPrimitiveType(),
            fieldType);
      case MAP:
        MapType mapType = (MapType) fieldType;
        ArrayColumnReader keyReader =
            new ArrayColumnReader(
                descriptor,
                pageReader,
                utcTimestamp,
                descriptor.getPrimitiveType(),
                new ArrayType(mapType.getKeyType()));
        ArrayColumnReader valueReader =
            new ArrayColumnReader(
                descriptors.get(1),
                pages.getPageReader(descriptors.get(1)),
                utcTimestamp,
                descriptors.get(1).getPrimitiveType(),
                new ArrayType(mapType.getValueType()));
        return new MapColumnReader(keyReader, valueReader, fieldType);
      case ROW:
        RowType rowType = (RowType) fieldType;
        GroupType groupType = physicalType.asGroupType();
        List<ColumnReader> fieldReaders = new ArrayList<>();
        for (int i = 0; i < rowType.getFieldCount(); i++) {
          // schema evolution: read the parquet file with a new extended field name.
          int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType);
          if (fieldIndex < 0) {
            fieldReaders.add(new EmptyColumnReader());
          } else {
            fieldReaders.add(
                createColumnReader(
                    utcTimestamp,
                    rowType.getTypeAt(i),
                    groupType.getType(fieldIndex),
                    descriptors,
                    pages,
                    depth + 1));
          }
        }
        return new RowColumnReader(fieldReaders);
      default:
        throw new UnsupportedOperationException(fieldType + " is not supported now.");
    }
  }

  public static WritableColumnVector createWritableColumnVector(
      int batchSize,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> descriptors) {
    return createWritableColumnVector(batchSize, fieldType, physicalType, descriptors, 0);
  }

  private static WritableColumnVector createWritableColumnVector(
      int batchSize,
      LogicalType fieldType,
      Type physicalType,
      List<ColumnDescriptor> columns,
      int depth) {
    List<ColumnDescriptor> descriptors = filterDescriptors(depth, physicalType, columns);
    PrimitiveType primitiveType = descriptors.get(0).getPrimitiveType();
    PrimitiveType.PrimitiveTypeName typeName = primitiveType.getPrimitiveTypeName();
    switch (fieldType.getTypeRoot()) {
      case BOOLEAN:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapBooleanVector(batchSize);
      case TINYINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapByteVector(batchSize);
      case DOUBLE:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapDoubleVector(batchSize);
      case FLOAT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapFloatVector(batchSize);
      case INTEGER:
      case DATE:
      case TIME_WITHOUT_TIME_ZONE:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapIntVector(batchSize);
      case BIGINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapLongVector(batchSize);
      case SMALLINT:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapShortVector(batchSize);
      case CHAR:
      case VARCHAR:
      case BINARY:
      case VARBINARY:
        checkArgument(
            typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapBytesVector(batchSize);
      case TIMESTAMP_WITHOUT_TIME_ZONE:
      case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS,
                getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType));
        return new HeapTimestampVector(batchSize);
      case DECIMAL:
        checkArgument(
            (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
                || typeName == PrimitiveType.PrimitiveTypeName.BINARY)
                && primitiveType.getOriginalType() == OriginalType.DECIMAL,
                getPrimitiveTypeCheckFailureMessage(typeName, fieldType));
        return new HeapDecimalVector(batchSize);
      case ARRAY:
        ArrayType arrayType = (ArrayType) fieldType;
        return new HeapArrayVector(
            batchSize,
            createWritableColumnVector(
                batchSize,
                arrayType.getElementType(),
                physicalType,
                descriptors,
                depth));
      case MAP:
        MapType mapType = (MapType) fieldType;
        GroupType repeatedType = physicalType.asGroupType().getType(0).asGroupType();
        // the map column has three level paths.
        return new HeapMapColumnVector(
            batchSize,
            createWritableColumnVector(
                batchSize,
                mapType.getKeyType(),
                repeatedType.getType(0),
                descriptors,
                depth + 2),
            createWritableColumnVector(
                batchSize,
                mapType.getValueType(),
                repeatedType.getType(1),
                descriptors,
                depth + 2));
      case ROW:
        RowType rowType = (RowType) fieldType;
        GroupType groupType = physicalType.asGroupType();
        WritableColumnVector[] columnVectors = new WritableColumnVector[rowType.getFieldCount()];
        for (int i = 0; i < columnVectors.length; i++) {
          // schema evolution: read the file with a new extended field name.
          int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType);
          if (fieldIndex < 0) {
            columnVectors[i] = (WritableColumnVector) createVectorFromConstant(rowType.getTypeAt(i), null, batchSize);
          } else {
            columnVectors[i] =
                createWritableColumnVector(
                    batchSize,
                    rowType.getTypeAt(i),
                    groupType.getType(fieldIndex),
                    descriptors,
                    depth + 1);
          }
        }
        return new HeapRowColumnVector(batchSize, columnVectors);
      default:
        throw new UnsupportedOperationException(fieldType + " is not supported now.");
    }
  }

  /**
   * Returns the field index with given physical row type {@code groupType} and field name {@code fieldName}.
   *
   * @return The physical field index or -1 if the field does not exist
   */
  private static int getFieldIndexInPhysicalType(String fieldName, GroupType groupType) {
    // get index from fileSchema type, else, return -1
    return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1;
  }

  /**
   * Construct the error message when primitive type mismatches.
   * @param primitiveType Primitive type
   * @param fieldType Logical field type
   * @return The error message
   */
  private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) {
    return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name());
  }

  /**
   * Construct the error message when original type mismatches.
   * @param originalType Original type
   * @param fieldType Logical field type
   * @return The error message
   */
  private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) {
    return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name());
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



