def process_comments()

in scripts/inline_comments_data_collection.py [0:0]


def process_comments(limit, diff_length_limit):
    patch_count = 0
    diff_id_to_revisions_map, diff_phid_to_id = load_revisions_maps()

    for patch_id, comments in review_data.get_all_inline_comments(lambda c: True):
        revision_info = diff_id_to_revisions_map[patch_id]
        transactions = revision_info["transactions"]

        resolved_comments = [comment for comment in comments if comment.is_done]

        if not resolved_comments:
            continue

        for comment in comments:
            comment_date_modified = comment.date_modified
            most_recent_update = find_recent_update(transactions, comment_date_modified)
            if not most_recent_update:
                continue

            try:
                fix_patch_id = diff_phid_to_id[most_recent_update["fields"]["new"]]
            except KeyError:
                diffs = api.search_diffs(diff_phid=most_recent_update["fields"]["new"])
                if not diffs:
                    raise NoDiffFoundForPHIDException(
                        most_recent_update["fields"]["new"]
                    )
                fix_patch_id = diffs[0]["id"]

            # If the most recent patch is the original patch itself, skip it
            if fix_patch_id == patch_id:
                continue

            revision_phid = revision_info["phid"]
            revision_id = revision_info["id"]
            bug_id = revision_info["fields"]["bugzilla.bug-id"]

            try:
                previous_patch_id = diff_phid_to_id[most_recent_update["fields"]["old"]]
            except Exception:
                diffs = api.search_diffs(diff_phid=most_recent_update["fields"]["old"])
                if not diffs:
                    raise NoDiffFoundForPHIDException(
                        most_recent_update["fields"]["old"]
                    )
                previous_patch_id = diffs[0]["id"]

            try:
                patch_diff = fetch_diff_from_url(
                    revision_id, previous_patch_id, fix_patch_id
                )
            except Exception as e:
                logger.error(f"Failed to fetch diff: {e}")
                continue

            if len(patch_diff) > diff_length_limit:
                continue

            relevant_diff = extract_relevant_diff(patch_diff, comment.filename)

            if relevant_diff:
                data = {
                    "bug_id": bug_id,
                    "revision_id": revision_id,
                    "revision_phid": revision_phid,
                    "initial_patch_id": patch_id,
                    "fix_patch_id": fix_patch_id,
                    "previous_patch_id": previous_patch_id,
                    "comment": comment.__dict__,
                    "fix_patch_diff": relevant_diff,
                }
                yield data

        patch_count += 1
        if patch_count >= limit:
            break