def main()

in jobs/broken-site-report-ml/broken_site_report_ml/main.py [0:0]


def main(bq_project_id, bq_dataset_id):
    client = bigquery.Client(project=bq_project_id)

    # Get datetime of the last classification run
    last_run_time = get_last_classification_datetime(client, bq_dataset_id)

    # Get reports that were filed since last classification run
    # and have non-empty descriptions as well as reports that were missed
    new_reports = get_reports_since_last_run(client, last_run_time, bq_dataset_id)
    missed_reports = get_missed_reports(client, last_run_time, bq_dataset_id)

    combined = missed_reports + new_reports

    deduplicated_combined = deduplicate_reports(combined)

    translated = translate_reports(client, deduplicated_combined, bq_dataset_id)

    if translated:
        save_translations(client, bq_dataset_id, translated)

    for report in deduplicated_combined:
        if report["uuid"] in translated:
            report["translated_text"] = translated[report["uuid"]]["translated_text"]

    if not deduplicated_combined:
        logging.info(
            f"No new reports with filled descriptions were found since {last_run_time}"
        )
        return

    result_count = 0

    try:
        for chunk in chunk_list(deduplicated_combined, 20):
            objects_dict = {
                row["uuid"]: {
                    "uuid": row["uuid"],
                    "title": row["title"],
                    "body": (
                        row["translated_text"]
                        if row.get("translated_text")
                        else row["body"]
                    ),
                }
                for row in chunk
            }
            logging.info("Getting classification results from bugbug.")
            result = get_reports_classification(
                "invalidcompatibilityreport", objects_dict
            )

            if result:
                result_count += len(result)
                logging.info("Saving classification results to BQ.")
                add_classification_results(client, bq_dataset_id, result)

            record_classification_run(client, bq_dataset_id, True, len(result))

    except Exception as e:
        logging.error(e)
        record_classification_run(client, bq_dataset_id, False, 0)
        raise

    finally:
        logging.info(f"Total processed reports count: {result_count}")