public static ColumnDescriptor convertToParquet()

in common/src/main/java/org/apache/comet/parquet/TypeUtil.java [39:107]


  public static ColumnDescriptor convertToParquet(StructField field) {
    Type.Repetition repetition;
    int maxDefinitionLevel;
    if (field.nullable()) {
      repetition = Type.Repetition.OPTIONAL;
      maxDefinitionLevel = 1;
    } else {
      repetition = Type.Repetition.REQUIRED;
      maxDefinitionLevel = 0;
    }
    String[] path = new String[] {field.name()};

    DataType type = field.dataType();

    Types.PrimitiveBuilder<PrimitiveType> builder = null;
    // Only partition column can be `NullType`, which also uses `ConstantColumnReader`. Here we
    // piggy-back onto Parquet boolean type for constant vector of null values, we don't really
    // care what Parquet type it is.
    if (type == DataTypes.BooleanType || type == DataTypes.NullType) {
      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition);
    } else if (type == DataTypes.IntegerType || type instanceof YearMonthIntervalType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
              .as(LogicalTypeAnnotation.intType(32, true));
    } else if (type == DataTypes.DateType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
              .as(LogicalTypeAnnotation.dateType());
    } else if (type == DataTypes.ByteType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
              .as(LogicalTypeAnnotation.intType(8, true));
    } else if (type == DataTypes.ShortType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
              .as(LogicalTypeAnnotation.intType(16, true));
    } else if (type == DataTypes.LongType) {
      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition);
    } else if (type == DataTypes.BinaryType) {
      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition);
    } else if (type == DataTypes.StringType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition)
              .as(LogicalTypeAnnotation.stringType());
    } else if (type == DataTypes.FloatType) {
      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition);
    } else if (type == DataTypes.DoubleType) {
      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition);
    } else if (type == DataTypes.TimestampType) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
              .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MICROS));
    } else if (type == TimestampNTZType$.MODULE$) {
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
              .as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MICROS));
    } else if (type instanceof DecimalType) {
      DecimalType decimalType = (DecimalType) type;
      builder =
          Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition)
              .length(16) // always store as Decimal128
              .as(LogicalTypeAnnotation.decimalType(decimalType.scale(), decimalType.precision()));
    }
    if (builder == null) {
      throw new UnsupportedOperationException("Unsupported input Spark type: " + type);
    }

    return new ColumnDescriptor(path, builder.named(field.name()), 0, maxDefinitionLevel);
  }