in spark-connector/common/src/main/scala/org/apache/spark/sql/odps/ArrowUtils.scala [57:87]
def fromArrowType(dt: ArrowType): DataType = dt match {
case ArrowType.Bool.INSTANCE => BooleanType
case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 => ByteType
case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 2 => ShortType
case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 4 => IntegerType
case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 8 => LongType
case float: ArrowType.FloatingPoint
if float.getPrecision() == FloatingPointPrecision.SINGLE => FloatType
case float: ArrowType.FloatingPoint
if float.getPrecision() == FloatingPointPrecision.DOUBLE => DoubleType
case ArrowType.Utf8.INSTANCE => StringType
case ArrowType.Binary.INSTANCE => BinaryType
case d: ArrowType.Decimal => if (d.getPrecision == 54 && d.getScale == 18) {
// set decimal to decimal(38, 18)
DecimalType(OdpsUtils.ODPS_DECIMAL_DEFAULT_PRECISION, OdpsUtils.ODPS_DECIMAL_DEFAULT_SCALE)
} else {
DecimalType(d.getPrecision, d.getScale)
}
case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType
/** for tunnel datetime */
case datetime: ArrowType.Date if datetime.getUnit == DateUnit.MILLISECOND => TimestampType
case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND => TimestampType
case tsNano: ArrowType.Timestamp if tsNano.getUnit == TimeUnit.NANOSECOND => TimestampType
/** for odps datetime */
case dt: ArrowType.Timestamp if dt.getUnit == TimeUnit.MILLISECOND => TimestampType
/** for odps extension timestamp */
case ots: OdpsTimestampType => TimestampType
/** TODO: for odps extension legacy decimal */
case _ => throw new UnsupportedOperationException(s"Unsupported data type: $dt")
}