def main()

in scripts/code_review_tool_evaluator.py [0:0]


def main(args):
    review_platform = "phabricator"
    review_data: code_review.ReviewData = code_review.review_data_classes[
        review_platform
    ]()

    tool_variants = get_tool_variants(
        generative_model_tool.create_llm_from_args(args), args.variants
    )

    evaluator = FeedbackEvaluator(args.evaluation_dataset)

    is_first_result = True
    result_file = os.path.join(
        args.results_dir,
        "code_review_tool_evaluator.csv",
    )
    evaluation_results_file = os.path.join(
        args.results_dir,
        f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
    )
    result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"]
    result_all_columns = result_unique_columns + [
        f"{title} ({variant_name})"
        for variant_name, _ in tool_variants
        for title in ("Comment", "Evaluation")
    ]
    evaluation_result_all_columns = [
        "variant_name",
        "revision_id",
        "diff_id",
        "new_comment",
        "old_comments_count",
        "matched",
        "old_comment",
        "evaluation",
    ]

    selected_review_requests = []
    if args.diff_ids:
        selected_review_requests = (
            ("n/a", code_review.ReviewRequest(diff_id)) for diff_id in args.diff_ids
        )
    elif args.review_request_ids:
        selected_review_requests = (
            (review_request_id, review_data.get_review_request_by_id(review_request_id))
            for review_request_id in args.review_request_ids
        )
    elif args.evaluation_strategy == "random":
        print("No review request IDs specified. Selecting a random sample.")
        selected_review_requests = (
            (revision_id, code_review.ReviewRequest(diff_id))
            for revision_id, diff_id in evaluator.evaluated_comments.query(
                "evaluation == 'CORRECT'"
            )[["revision_id", "diff_id"]]
            .drop_duplicates()
            .sample(20)
            .itertuples(index=False)
        )
    elif args.evaluation_strategy == "same":
        selected_review_requests = (
            (revision_id, code_review.ReviewRequest(diff_id))
            for revision_id, diff_id in pd.read_csv(
                get_latest_evaluation_results_file(args.results_dir),
            )[["revision_id", "diff_id"]]
            .drop_duplicates()
            .itertuples(name=None, index=False)
        )
    else:
        raise ValueError(
            "Please specify either --diff-id or --revision-id. Alternatively, use --evaluation-strategy."
        )

    for review_request_id, review_request in selected_review_requests:
        print("---------------------------------------------------------")
        print(f"Review Request ID: {review_request_id}")
        print(f"Patch ID: {review_request.patch_id}")
        patch = review_data.get_patch_by_id(review_request.patch_id)
        print("---------------------------------------------------------")

        if len(patch.raw_diff) > 20_000:
            print("Skipping the patch because it is too large.")
            continue

        all_variants_results = []
        all_variants_evaluation_results = []
        for variant_name, tool in tool_variants:
            print(f"\n\nVariant: {variant_name}\n")
            try:
                comments = tool.run(patch)
            except code_review.FileNotInPatchError as e:
                print("Error while running the tool:", e)
                continue
            except code_review.LargeDiffError:
                print("Skipping the patch because it is too large.")
                continue

            print_prettified_comments(comments)
            comment_per_line_counter = defaultdict(int)

            evaluation = evaluator.evaluate_diff_comments(
                review_request.patch_id, comments
            )

            all_variants_evaluation_results.extend(
                {
                    "variant_name": variant_name,
                    "revision_id": review_request_id,
                    "diff_id": review_request.patch_id,
                    **row,
                }
                for row in evaluation
            )

            for i, comment in enumerate(comments):
                key = (review_request_id, comment.filename, comment.end_line)
                comment_per_line_counter[key] += 1

                all_variants_results.append(
                    {
                        "Review Request ID": review_request_id,
                        "File": comment.filename,
                        "Line": comment.end_line,
                        "Comment Number": comment_per_line_counter[key],
                        f"Comment ({variant_name})": comment.content,
                        f"Evaluation ({variant_name})": evaluation[i].get("evaluation"),
                    }
                )

        df = (
            pd.DataFrame(all_variants_results, columns=result_all_columns)
            .groupby(result_unique_columns)
            .first()
        )
        df.to_csv(
            result_file,
            header=is_first_result,
            mode="w" if is_first_result else "a",
        )

        df = pd.DataFrame(
            all_variants_evaluation_results, columns=evaluation_result_all_columns
        )
        df.to_csv(
            evaluation_results_file,
            index=False,
            header=is_first_result,
            mode="w" if is_first_result else "a",
        )

        if is_first_result:
            is_first_result = False
            print("You can find the results in the file:", result_file)

        print("\n\n\n")