def start_job()

in mozetl/bhr_collection/bhr_collection.py [0:0]


def start_job(date, sample_size, use_gcs, thread_filter, output_tag, bq_connector_jar):
    print(f"Running for {date}")
    print(f"Using sample size {sample_size}")

    spark_builder = SparkSession.builder.appName("bhr-collection")
    if bq_connector_jar:
        spark_builder = spark_builder.config("spark.jars", bq_connector_jar)
    global spark
    global sc
    spark = spark_builder.getOrCreate()
    sc = spark.sparkContext

    etl_job_daily(
        sc,
        spark,
        {
            "start_date": date - timedelta(days=4),
            "end_date": date - timedelta(days=4),
            "hang_profile_in_filename": "hangs_" + output_tag,
            "hang_profile_out_filename": "hangs_" + output_tag,
            "thread_filter": thread_filter,
            "hang_lower_bound": 128,
            "hang_upper_bound": 65536,
            "sample_size": sample_size,
            "use_gcs": use_gcs,
        },
    )