in src/ab/plugins/calculate/spark.py [0:0]
def init_spark_builder(config):
if not config.SPARK:
logger.info('no config.SPARK found, spark uninitialized')
return
try:
import pyspark
logger.info('spark driver version:', get_spark_driver_version())
except ImportError:
logger.warning('pyspark not found, spark uninitialized')
return
root = os.path.join(os.path.dirname(__file__), os.path.pardir)
if config.get('TESTING'):
try:
os.symlink(os.path.join(os.getcwd(), 'spark_jars'), os.path.join(root, 'spark_jars'))
except Exception as e:
pass
spark_jars_dir = os.path.join(root, 'spark_jars', '*.jar')
spark_jars = glob.glob(spark_jars_dir)
spark_jars_str = ','.join(spark_jars)
if 'spark.jars' in config.SPARK:
config.SPARK['spark.jars'] += ',' + spark_jars_str
else:
config.SPARK['spark.jars'] = spark_jars_str
global spark_builder
spark_builder = pyspark.sql.SparkSession.builder.appName(config.APP_NAME)
# namely: self.config("spark.sql.catalogImplementation", "hive")
spark_builder.enableHiveSupport()
for k, v in config.SPARK.items():
spark_builder.config(k, v)