def get_schema_from_spark()

in notebooks/Utilities/finspace_spark.py [0:0]


    def get_schema_from_spark(self, data_frame: pyspark.sql.dataframe.DataFrame):
        from pyspark.sql.types import StructType

        # for translation to FinSpace's schema
        # 'STRING'|'CHAR'|'INTEGER'|'TINYINT'|'SMALLINT'|'BIGINT'|'FLOAT'|'DOUBLE'|'DATE'|'DATETIME'|'BOOLEAN'|'BINARY'
        DoubleType    = "DOUBLE"
        FloatType     = "FLOAT"
        DateType      = "DATE"
        StringType    = "STRING"
        IntegerType   = "INTEGER"
        LongType      = "BIGINT"
        BooleanType   = "BOOLEAN"
        TimestampType = "DATETIME"
        
        hab_columns = []

        items = [i for i in data_frame.schema] 

        switcher = {
            "BinaryType"    : StringType,
            "BooleanType"   : BooleanType,
            "ByteType"      : IntegerType,
            "DateType"      : DateType,
            "DoubleType"    : FloatType,
            "IntegerType"   : IntegerType,
            "LongType"      : IntegerType,
            "NullType"      : StringType,
            "ShortType"     : IntegerType,
            "StringType"    : StringType,
            "TimestampType" : TimestampType,
        }

        
        for i in items:
#            print( f"name: {i.name} type: {i.dataType}" )

            habType = switcher.get( str(i.dataType), StringType)

            hab_columns.append({
                "dataType"    : habType, 
                "name"        : i.name,
                "description" : ""
            })

        return( hab_columns )