project/paperbench/experiments/pbcd_correlation/plot.py (64 lines of code) (raw):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
def load_and_prepare_data(filepath):
"""
Load and prepare the data for analysis.
Args:
filepath (str): Path to the CSV file containing results
Returns:
tuple: (code_only_average, normal_average) - Average scores for both conditions
"""
# Load and sort data
data = pd.read_csv(filepath)
data = data.sort_values(["agent_id", "paper_id", "run_number"])
# Split data by condition
normal = data[data["agent_id"] == "aisi-basic-agent-iterative-o1"]
code_only = data[data["agent_id"] == "aisi-basic-agent-iterative-o1-pb-code-only"]
# Calculate averages by paper
normal_average = normal.groupby(["paper_id"]).score.agg("mean")
code_only_average = code_only.groupby(["paper_id"]).score.agg("mean")
return code_only_average, normal_average
def perform_regression(code_only_scores, normal_scores):
"""
Perform linear regression analysis on the data.
Args:
code_only_scores (np.ndarray): Scores for code-only condition
normal_scores (np.ndarray): Scores for normal condition
Returns:
tuple: (model, r_squared, slope, intercept) - Regression results
"""
X = code_only_scores.reshape(-1, 1)
y = normal_scores
model = LinearRegression()
model.fit(X, y)
r_squared = model.score(X, y)
slope = model.coef_[0]
intercept = model.intercept_
return model, r_squared, slope, intercept
def plot_regression(code_only_scores, normal_scores, model):
"""
Create and save a scatter plot with regression line.
Args:
code_only_scores (np.ndarray): Scores for code-only condition
normal_scores (np.ndarray): Scores for normal condition
model (LinearRegression): Fitted regression model
"""
x_range = np.linspace(0.2, 0.7, 50)
y_fit = model.predict(x_range.reshape(-1, 1))
plt.rcParams.update({"font.size": 7})
fig, ax = plt.subplots(figsize=(6.75133 / 1.5, 2.75))
ax.scatter(code_only_scores, normal_scores, s=10)
ax.plot(x_range, y_fit, "r-")
# Add line of best fit equation and R² value
r_squared = model.score(code_only_scores.reshape(-1, 1), normal_scores)
equation = f"y = {model.coef_[0]:.3f}x + {model.intercept_:.3f}"
r2_text = f"R² = {r_squared:.3f}"
ax.text(
0.05,
0.95,
f"{equation}\n{r2_text}",
transform=ax.transAxes,
verticalalignment="top",
bbox=dict(facecolor="white", alpha=0.8),
)
ax.set_xlabel("PaperBench Code-Dev Performance")
ax.set_ylabel("PaperBench Performance")
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(
"experiments/pbcd_correlation/correlation_plot.pdf",
bbox_inches="tight",
dpi=300,
pad_inches=0.01,
)
# plt.show()
plt.close()
def main():
"""Main execution function."""
# Load and prepare data
results_path = "experiments/pbcd_correlation/results.csv"
code_only_average, normal_average = load_and_prepare_data(results_path)
# Convert to numpy arrays
code_only_scores = code_only_average.to_numpy()
normal_scores = normal_average.to_numpy()
# Perform regression analysis
model, r_squared, slope, intercept = perform_regression(code_only_scores, normal_scores)
# Print results
print(f"Regression Results:")
print(f"R-squared: {r_squared:.4f}")
print(f"Slope: {slope:.4f}")
print(f"Intercept: {intercept:.4f}")
# Create and save plot
plot_regression(code_only_scores, normal_scores, model)
print("\nPlot saved as 'experiments/pbcd_correlation/correlation_plot.pdf'")
if __name__ == "__main__":
main()