in src/ab/plugins/data/engine.py [0:0]
def convert_to_spark_data_type(sample, table_info):
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType, DoubleType, \
TimestampType, DateType
mapping = {
'String': StringType(),
'Long': IntegerType(),
'Double': DoubleType(),
# Boolean and Date fields should be further analyzed
}
fields = []
columns = table_info['columns']
for c in columns:
cname = c['field']
xt = c['xlabType']
if xt == 'Date':
sample, spark_type = SparkEngine.convert_date_type(sample, cname)
elif xt == 'Boolean':
sample, spark_type = SparkEngine.convert_boolean_type(sample, cname)
else:
spark_type = mapping[xt]
fields.append(StructField(cname, spark_type, True))
spark_dataframe = spark.get_or_create().createDataFrame(sample, schema=StructType(fields))
return spark_dataframe