in spark/spark-3.4/src/main/scala/org/apache/sedona/sql/datasources/shapefile/ShapefileUtils.scala [150:201]
def fieldValueConverter(desc: FieldDescriptor, cpg: Option[String]): Array[Byte] => Any = {
desc.getFieldType match {
case 'C' =>
val encoding = cpg.getOrElse("ISO-8859-1")
if (encoding.toLowerCase(Locale.ROOT) == "utf-8") { (bytes: Array[Byte]) =>
UTF8String.fromBytes(bytes).trimRight()
} else { (bytes: Array[Byte]) =>
{
val str = new String(bytes, encoding)
UTF8String.fromString(str).trimRight()
}
}
case 'N' | 'F' =>
val scale = desc.getFieldDecimalCount
if (scale == 0) { (bytes: Array[Byte]) =>
try {
new String(bytes, StandardCharsets.ISO_8859_1).trim.toLong
} catch {
case _: Exception => null
}
} else { (bytes: Array[Byte]) =>
try {
Decimal.fromString(UTF8String.fromBytes(bytes))
} catch {
case _: Exception => null
}
}
case 'L' =>
(bytes: Array[Byte]) =>
if (bytes.isEmpty) null
else {
bytes.head match {
case 'T' | 't' | 'Y' | 'y' => true
case 'F' | 'f' | 'N' | 'n' => false
case _ => null
}
}
case 'D' =>
(bytes: Array[Byte]) => {
try {
val dateString = new String(bytes, StandardCharsets.ISO_8859_1)
val formatter = DateTimeFormatter.BASIC_ISO_DATE
val date = LocalDate.parse(dateString, formatter)
date.toEpochDay.toInt
} catch {
case _: Exception => null
}
}
case _ =>
throw new IllegalArgumentException(s"Unsupported field type ${desc.getFieldType}")
}
}