in src/ab/plugins/calculate/spark.py [0:0]
def get_or_create():
"""
when spark build got inited, it has a underlying spark session singleton named _instantiatedSession,
which is set to None as default. Only after getOrCreate() will it be set.
So if we don't call this algorithm before fork, the return spark session should be process-isolated
thread-safe and process-safe
"""
# check pid before potential errors
_checkpid()
if not spark_builder:
raise AttributeError('spark not initialized')
if get_spark_driver_version() < '2.2.0':
# workaround for https://issues.apache.org/jira/browse/SPARK-10872
# step 1: create unique derby home dir for each process
spark_builder.config('spark.driver.extraJavaOptions',
'-Dderby.system.home={derby_home}'.format(derby_home=get_derby_home()))
import pyspark
from ab import app
if app.config.SAVE_SPARK_LOG:
# TODO: got jammed while creating session on spark 2.4.0
with hook_module(pyspark.java_gateway, 'Popen', HookedPopen):
return get_or_create_inner()
else:
spark = get_or_create_inner()
spark.sparkContext.setLogLevel('ERROR')
return spark