in spark-connector/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala [185:254]
def javaTypeToDataType(clz: Type): DataType = clz match {
// writable
case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType.SYSTEM_DEFAULT
case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
case c: Class[_] if c == classOf[hiveIo.DateWritable] => DateType
case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType
case c: Class[_] if c == classOf[hadoopIo.Text] => StringType
case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType
case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType
case c: Class[_] if c == classOf[hadoopIo.FloatWritable] => FloatType
case c: Class[_] if c == classOf[hadoopIo.BooleanWritable] => BooleanType
case c: Class[_] if c == classOf[hadoopIo.BytesWritable] => BinaryType
// java class
case c: Class[_] if c == classOf[java.lang.String] => StringType
case c: Class[_] if c == classOf[java.sql.Date] => DateType
case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
case c: Class[_] if c == classOf[HiveDecimal] => DecimalType.SYSTEM_DEFAULT
case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType.SYSTEM_DEFAULT
case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
case c: Class[_] if c == classOf[java.lang.Short] => ShortType
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
case c: Class[_] if c == classOf[java.lang.Long] => LongType
case c: Class[_] if c == classOf[java.lang.Double] => DoubleType
case c: Class[_] if c == classOf[java.lang.Byte] => ByteType
case c: Class[_] if c == classOf[java.lang.Float] => FloatType
case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
// primitive type
case c: Class[_] if c == java.lang.Short.TYPE => ShortType
case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
case c: Class[_] if c == java.lang.Long.TYPE => LongType
case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
case c: Class[_] if c == java.lang.Float.TYPE => FloatType
case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
case c: Class[_] if c.isArray => ArrayType(javaTypeToDataType(c.getComponentType))
// Hive seems to return this for struct types?
case c: Class[_] if c == classOf[java.lang.Object] => NullType
case p: ParameterizedType if isSubClassOf(p.getRawType, classOf[java.util.List[_]]) =>
val Array(elementType) = p.getActualTypeArguments
ArrayType(javaTypeToDataType(elementType))
case p: ParameterizedType if isSubClassOf(p.getRawType, classOf[java.util.Map[_, _]]) =>
val Array(keyType, valueType) = p.getActualTypeArguments
MapType(javaTypeToDataType(keyType), javaTypeToDataType(valueType))
// raw java list type unsupported
case c: Class[_] if isSubClassOf(c, classOf[java.util.List[_]]) =>
throw new AnalysisException(
"Raw list type in java is unsupported because Spark cannot infer the element type.")
// raw java map type unsupported
case c: Class[_] if isSubClassOf(c, classOf[java.util.Map[_, _]]) =>
throw new AnalysisException(
"Raw map type in java is unsupported because Spark cannot infer key and value types.")
case _: WildcardType =>
throw new AnalysisException(
"Collection types with wildcards (e.g. List<?> or Map<?, ?>) are unsupported because " +
"Spark cannot infer the data type for these type parameters.")
case c => throw new AnalysisException(s"Unsupported java type $c")
}