in jobs/broken-site-report-ml/broken_site_report_ml/main.py [0:0]
def main(bq_project_id, bq_dataset_id):
client = bigquery.Client(project=bq_project_id)
# Get datetime of the last classification run
last_run_time = get_last_classification_datetime(client, bq_dataset_id)
# Get reports that were filed since last classification run
# and have non-empty descriptions as well as reports that were missed
new_reports = get_reports_since_last_run(client, last_run_time, bq_dataset_id)
missed_reports = get_missed_reports(client, last_run_time, bq_dataset_id)
combined = missed_reports + new_reports
deduplicated_combined = deduplicate_reports(combined)
translated = translate_reports(client, deduplicated_combined, bq_dataset_id)
if translated:
save_translations(client, bq_dataset_id, translated)
for report in deduplicated_combined:
if report["uuid"] in translated:
report["translated_text"] = translated[report["uuid"]]["translated_text"]
if not deduplicated_combined:
logging.info(
f"No new reports with filled descriptions were found since {last_run_time}"
)
return
result_count = 0
try:
for chunk in chunk_list(deduplicated_combined, 20):
objects_dict = {
row["uuid"]: {
"uuid": row["uuid"],
"title": row["title"],
"body": (
row["translated_text"]
if row.get("translated_text")
else row["body"]
),
}
for row in chunk
}
logging.info("Getting classification results from bugbug.")
result = get_reports_classification(
"invalidcompatibilityreport", objects_dict
)
if result:
result_count += len(result)
logging.info("Saving classification results to BQ.")
add_classification_results(client, bq_dataset_id, result)
record_classification_run(client, bq_dataset_id, True, len(result))
except Exception as e:
logging.error(e)
record_classification_run(client, bq_dataset_id, False, 0)
raise
finally:
logging.info(f"Total processed reports count: {result_count}")