in mozetl/hardware_report/summarize_json.py [0:0]
def generate_report(start_date, end_date, spark):
"""Generate the hardware survey dataset for the reference timeframe.
If the timeframe is longer than a week, split it in in weekly chunks
and process each chunk individually (eases backfilling).
The report for each week is saved in a local JSON file.
Args:
start_date: The date from which we start generating the report. If None,
the report starts from the beginning of the past week (Sunday).
end_date: The date the marks the end of the reporting period. This only
makes sense if a |start_date| was provided. If None, this defaults
to the end of the past week (Saturday).
"""
# If no start_date was provided, generate a report for the past complete
# week.
last_week = moz_std.get_last_week_range()
date_range = (
moz_std.snap_to_beginning_of_week(start_date, "Sunday")
if start_date is not None
else last_week[0],
end_date if (end_date is not None and start_date is not None) else last_week[1],
)
# Split the submission period in chunks, so we don't run out of resources while aggregating if
# we want to backfill.
chunk_start = date_range[0]
chunk_end = None
# Stores all hardware reports in json by date
date_to_json = {}
while chunk_start < date_range[1]:
chunk_end = chunk_start + dt.timedelta(days=6)
longitudinal_version = get_longitudinal_version(chunk_end)
sqlQuery = """