in scripts/code_review_tool_evaluator.py [0:0]
def main(args):
review_platform = "phabricator"
review_data: code_review.ReviewData = code_review.review_data_classes[
review_platform
]()
tool_variants = get_tool_variants(
generative_model_tool.create_llm_from_args(args), args.variants
)
evaluator = FeedbackEvaluator(args.evaluation_dataset)
is_first_result = True
result_file = os.path.join(
args.results_dir,
"code_review_tool_evaluator.csv",
)
evaluation_results_file = os.path.join(
args.results_dir,
f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
)
result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"]
result_all_columns = result_unique_columns + [
f"{title} ({variant_name})"
for variant_name, _ in tool_variants
for title in ("Comment", "Evaluation")
]
evaluation_result_all_columns = [
"variant_name",
"revision_id",
"diff_id",
"new_comment",
"old_comments_count",
"matched",
"old_comment",
"evaluation",
]
selected_review_requests = []
if args.diff_ids:
selected_review_requests = (
("n/a", code_review.ReviewRequest(diff_id)) for diff_id in args.diff_ids
)
elif args.review_request_ids:
selected_review_requests = (
(review_request_id, review_data.get_review_request_by_id(review_request_id))
for review_request_id in args.review_request_ids
)
elif args.evaluation_strategy == "random":
print("No review request IDs specified. Selecting a random sample.")
selected_review_requests = (
(revision_id, code_review.ReviewRequest(diff_id))
for revision_id, diff_id in evaluator.evaluated_comments.query(
"evaluation == 'CORRECT'"
)[["revision_id", "diff_id"]]
.drop_duplicates()
.sample(20)
.itertuples(index=False)
)
elif args.evaluation_strategy == "same":
selected_review_requests = (
(revision_id, code_review.ReviewRequest(diff_id))
for revision_id, diff_id in pd.read_csv(
get_latest_evaluation_results_file(args.results_dir),
)[["revision_id", "diff_id"]]
.drop_duplicates()
.itertuples(name=None, index=False)
)
else:
raise ValueError(
"Please specify either --diff-id or --revision-id. Alternatively, use --evaluation-strategy."
)
for review_request_id, review_request in selected_review_requests:
print("---------------------------------------------------------")
print(f"Review Request ID: {review_request_id}")
print(f"Patch ID: {review_request.patch_id}")
patch = review_data.get_patch_by_id(review_request.patch_id)
print("---------------------------------------------------------")
if len(patch.raw_diff) > 20_000:
print("Skipping the patch because it is too large.")
continue
all_variants_results = []
all_variants_evaluation_results = []
for variant_name, tool in tool_variants:
print(f"\n\nVariant: {variant_name}\n")
try:
comments = tool.run(patch)
except code_review.FileNotInPatchError as e:
print("Error while running the tool:", e)
continue
except code_review.LargeDiffError:
print("Skipping the patch because it is too large.")
continue
print_prettified_comments(comments)
comment_per_line_counter = defaultdict(int)
evaluation = evaluator.evaluate_diff_comments(
review_request.patch_id, comments
)
all_variants_evaluation_results.extend(
{
"variant_name": variant_name,
"revision_id": review_request_id,
"diff_id": review_request.patch_id,
**row,
}
for row in evaluation
)
for i, comment in enumerate(comments):
key = (review_request_id, comment.filename, comment.end_line)
comment_per_line_counter[key] += 1
all_variants_results.append(
{
"Review Request ID": review_request_id,
"File": comment.filename,
"Line": comment.end_line,
"Comment Number": comment_per_line_counter[key],
f"Comment ({variant_name})": comment.content,
f"Evaluation ({variant_name})": evaluation[i].get("evaluation"),
}
)
df = (
pd.DataFrame(all_variants_results, columns=result_all_columns)
.groupby(result_unique_columns)
.first()
)
df.to_csv(
result_file,
header=is_first_result,
mode="w" if is_first_result else "a",
)
df = pd.DataFrame(
all_variants_evaluation_results, columns=evaluation_result_all_columns
)
df.to_csv(
evaluation_results_file,
index=False,
header=is_first_result,
mode="w" if is_first_result else "a",
)
if is_first_result:
is_first_result = False
print("You can find the results in the file:", result_file)
print("\n\n\n")