in mozetl/bhr_collection/bhr_collection.py [0:0]
def start_job(date, sample_size, use_gcs, thread_filter, output_tag, bq_connector_jar):
print(f"Running for {date}")
print(f"Using sample size {sample_size}")
spark_builder = SparkSession.builder.appName("bhr-collection")
if bq_connector_jar:
spark_builder = spark_builder.config("spark.jars", bq_connector_jar)
global spark
global sc
spark = spark_builder.getOrCreate()
sc = spark.sparkContext
etl_job_daily(
sc,
spark,
{
"start_date": date - timedelta(days=4),
"end_date": date - timedelta(days=4),
"hang_profile_in_filename": "hangs_" + output_tag,
"hang_profile_out_filename": "hangs_" + output_tag,
"thread_filter": thread_filter,
"hang_lower_bound": 128,
"hang_upper_bound": 65536,
"sample_size": sample_size,
"use_gcs": use_gcs,
},
)