in spark/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/Utils.scala [35:63]
def hbaseFieldToScalaType(f: Field, src: Array[Byte], offset: Int, length: Int): Any = {
if (f.exeSchema.isDefined) {
// If we have avro schema defined, use it to get record, and then convert them to catalyst data type
val m = AvroSerdes.deserialize(src, f.exeSchema.get)
val n = f.avroToCatalyst.map(_(m))
n.get
} else {
// Fall back to atomic type
f.dt match {
case BooleanType => src(offset) != 0
case ByteType => src(offset)
case ShortType => Bytes.toShort(src, offset)
case IntegerType => Bytes.toInt(src, offset)
case LongType => Bytes.toLong(src, offset)
case FloatType => Bytes.toFloat(src, offset)
case DoubleType => Bytes.toDouble(src, offset)
case DateType => new Date(Bytes.toLong(src, offset))
case TimestampType => new Timestamp(Bytes.toLong(src, offset))
case StringType => Bytes.toString(src, offset, length)
case BinaryType =>
val newArray = new Array[Byte](length)
System.arraycopy(src, offset, newArray, 0, length)
newArray
case _: DecimalType => Bytes.toBigDecimal(src, offset, length)
// TODO: SparkSqlSerializer.deserialize[Any](src)
case _ => throw new Exception(s"unsupported data type ${f.dt}")
}
}
}