in common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala [118:142]
def toArrowType(dt: DataType, timeZoneId: String): ArrowType =
dt match {
case BooleanType => ArrowType.Bool.INSTANCE
case ByteType => new ArrowType.Int(8, true)
case ShortType => new ArrowType.Int(8 * 2, true)
case IntegerType => new ArrowType.Int(8 * 4, true)
case LongType => new ArrowType.Int(8 * 8, true)
case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
case StringType => ArrowType.Utf8.INSTANCE
case BinaryType => ArrowType.Binary.INSTANCE
case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale, 128)
case DateType => new ArrowType.Date(DateUnit.DAY)
case TimestampType =>
if (timeZoneId == null) {
throw new UnsupportedOperationException(
s"${TimestampType.catalogString} must supply timeZoneId parameter")
} else {
new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId)
}
case TimestampNTZType =>
new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
case _ =>
throw new UnsupportedOperationException(s"Unsupported data type: ${dt.catalogString}")
}