in webinars/snowflake_2021-09/finspace_spark.py [0:0]
def get_schema_from_spark(self, data_frame: pyspark.sql.dataframe.DataFrame):
from pyspark.sql.types import StructType
# for translation to FinSpace's schema
# 'STRING'|'CHAR'|'INTEGER'|'TINYINT'|'SMALLINT'|'BIGINT'|'FLOAT'|'DOUBLE'|'DATE'|'DATETIME'|'BOOLEAN'|'BINARY'
DoubleType = "DOUBLE"
FloatType = "FLOAT"
DateType = "DATE"
StringType = "STRING"
IntegerType = "INTEGER"
LongType = "BIGINT"
BooleanType = "BOOLEAN"
TimestampType = "DATETIME"
hab_columns = []
items = [i for i in data_frame.schema]
switcher = {
"BinaryType" : StringType,
"BooleanType" : BooleanType,
"ByteType" : IntegerType,
"DateType" : DateType,
"DoubleType" : FloatType,
"IntegerType" : IntegerType,
"LongType" : IntegerType,
"NullType" : StringType,
"ShortType" : IntegerType,
"StringType" : StringType,
"TimestampType" : TimestampType,
}
for i in items:
# print( f"name: {i.name} type: {i.dataType}" )
habType = switcher.get( str(i.dataType), StringType)
hab_columns.append({
"dataType" : habType,
"name" : i.name,
"description" : ""
})
return( hab_columns )