in scripts/figs.py [0:0]
def draw_overall_new(front: bool = False,
job: bool = False,
pr: bool = False):
assert not front or not pr or not job, "Only one of front, job, and pr can be True"
todo_dfs = []
# header: tag, c, network_cost, egress_cost_Spark, egress_cost_Presto, rep_cost,
# ingress_volume_Spark, ingress_volume_Presto, egress_volume_Spark, egress_volume_Presto
baseline_df = pd.read_csv(f'../baselines_done/log.csv')
if front:
suffix = "_front"
todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai\n(Our)"))
todo_dfs.append(overall_stats(pd.read_csv(f'../yugong_results_rep0.000/log.csv'), "Yugong\n(Alibaba)"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "No\nRep"], "No Rep\n(Spotify)"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "3M\n21%"], "Rep 3Mon.\n(Twitter)"))
elif job:
suffix = "_job"
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n2.5%"], "Volley\nRepTop2.5%"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "MoiJob\n0.2%"], "Moi-\nJobDist"))
todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai"))
elif pr:
suffix = "_pr"
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "MoiJob\n0.2%"], "Moi\nJobDist"))
for rate in [0.001, 0.002, 0.004]:
df = pd.read_csv(f'../sample_1.000_rep{rate:.3f}/log.csv')
todo_dfs.append(overall_stats(df, f"Moi\nPR{rate * 100:.1f}%"))
for sample_rate in [0.010, 0.050]: # 0.001,
df = pd.read_csv(f'../sample_{sample_rate:.3f}/log.csv')
todo_dfs.append(overall_stats(df, f"Moi\n{sample_rate * 100:.0f}%Job"))
else:
suffix = ""
todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai"))
todo_dfs.append(overall_stats(pd.read_csv(f'../yugong_results_rep0.000/log.csv'), "Yugong"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n0%"], "Volley"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "RTD\n2.5%"], "Rep\nTop2.5%"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "No\nRep"], "No\nRep"))
todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "3M\n21%"], "Rep\n3Mon."))
#todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n2.5%"], "Volley\nATD"))
df = pd.concat(todo_dfs)
df.to_csv('overall_stats_new.csv', index=False)
for c in df['c'].unique():
if front:
df_c = df[df['c'] == c].set_index('tag').loc[["No Rep\n(Spotify)", "Rep 3Mon.\n(Twitter)", "Yugong\n(Alibaba)", "Moirai\n(Our)"]] # Ensure order
elif job:
df_c = df[df['c'] == c].set_index('tag').loc[["Volley\nRepTop2.5%", "Moi-\nJobDist", "Moirai"]]
elif pr:
df_c = df[df['c'] == c].set_index('tag').loc[["Moi\nJobDist", "Moi\nPR0.1%", "Moi\nPR0.2%", "Moi\nPR0.4%", "Moi\n1%Job", "Moi\n5%Job"]] # "Volley\nATD",
else:
df_c = df[df['c'] == c].set_index('tag').loc[["No\nRep", "Volley", "Rep\n3Mon.", "Rep\nTop2.5%", "Yugong", "Moirai"]] # Ensure order "Volley\nATD",
print(df_c)
# Create subplots
if front:
fig, ax1 = plt.subplots(1, 1, figsize=(6, 4.5), constrained_layout=True)
else:
fig, (ax2, ax1) = plt.subplots(1, 2, figsize=(11, 4), constrained_layout=True)
# ---- PLOT 1: Cost Breakdown ---- #
df_costs = df_c[['egress_cost', 'rep_cost', 'network_cost']]
# df_costs.set_index('tag', inplace=True)
df_costs.plot(kind='bar', stacked=True, ax=ax1, color=[
replication_cost_color, egress_cost_color, network_cost_color])
# Apply hatch patterns to the bars
for bar, hatch in zip(ax1.containers, hatch_patterns[:len(df_costs.columns)]):
for patch in bar.patches:
patch.set_hatch(hatch)
# Add total sum as a single number on top of each bar
for idx, rects in enumerate(zip(*ax1.containers)): # Stacked bars
total_height = sum(rect.get_height() for rect in rects)
if total_height > 0:
ax1.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
f'{total_height / 1000:.0f}K' if total_height < 1000000 else f'{total_height / 1000**2:.1f}M',
ha='center', va='bottom', fontsize=font_size - 2, color='black')
if not front and not pr and not job:
for idx, tag in enumerate(["No\nRep", "Volley", "Rep\n3Mon.", "Rep\nTop2.5%", "Volley\nRepTop2.5%"]):
if tag in df_costs.index:
total_cost = df_c.loc[tag, "total_cost"]
total_cost_std = df_c.loc[tag, "total_cost_std"]
ax1.errorbar(x=idx, y=total_cost, yerr=total_cost_std, color='black', capsize=5,
label="Std Dev" if idx == 0 else "")
ax1.set_ylabel("Weekly Cost ($)", fontsize=font_size)
ax1.set_xlabel(None)
ax1.tick_params(rotation=0, labelsize=font_size - 2)
if not front:
ax1.set_xticklabels(df_costs.index, fontsize=font_size - 5, rotation=15)
else:
ax1.set_xticklabels(df_costs.index, fontsize=font_size - 3, rotation=0)
if pr or job:
yticks = [0, 20 * 1000, 40 * 1000, 60 * 1000, 80 * 1000, 100 * 1000, 120 * 1000]
ytick_labels = ["0", "20K", "40K", "60K", "80K", "100K", "120K"]
else:
yticks = [0, 300 * 1000, 600 * 1000, 900 * 1000, 1200 * 1000, 1500 * 1000]
ytick_labels = ["0", "300K", "600K", "900K", "1200K", "1500K"]
ax1.set_yticks(yticks)
ax1.set_yticklabels(ytick_labels, fontsize=font_size - 2)
if c == 30 or front:
ax1.legend(["Egress", "Replication", "Network"], fontsize=font_size - 2, ncol=1) # , loc='upper center'
else:
ax1.get_legend().remove()
ax1.grid(axis='y')
# ---- PLOT 2: Traffic Breakdown ---- #
if not front:
df_traffic = df_c[
['ingress_volume', 'egress_volume']]
# df_traffic.set_index('tag', inplace=True)
df_traffic.plot(kind='bar', stacked=True, ax=ax2, color=[
ingress_traffic_color, egress_traffic_color])
# Apply hatch patterns to the traffic bars
for bar, hatch in zip(ax2.containers, hatch_patterns[len(df_costs.columns):]):
for patch in bar.patches:
patch.set_hatch(hatch)
# Add total sum as a single number on top of each bar
for idx, rects in enumerate(zip(*ax2.containers)): # Stacked bars
total_height = sum(rect.get_height() for rect in rects)
if total_height > 1024:
ax2.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
f'{total_height / 1024:.1f}PB', ha='center', va='bottom', fontsize=font_size - 6,
color='black')
else:
ax2.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
f'{total_height:.0f}TB', ha='center', va='bottom', fontsize=font_size - 6,
color='black')
ax2.set_ylabel("Weekly Traffic", fontsize=font_size)
ax2.set_xlabel(None)
ax2.tick_params(rotation=0, labelsize=font_size - 2)
ax2.set_xticklabels(df_costs.index, fontsize=font_size - 5, rotation=15)
if pr or job:
yticks = [0, 2 * 1024, 4 * 1024, 6 * 1024, 8 * 1024]
ytick_labels = ["0", "2PB", "4PB", "6PB", "8PB"]
else:
yticks = [0, 30 * 1024, 60 * 1024, 90 * 1024, 120 * 1024]
ytick_labels = ["0", "30PB", "60PB", "90PB", "120PB"]
ax2.set_yticks(yticks)
ax2.set_yticklabels(ytick_labels, fontsize=font_size - 2)
if c == 30:
ax2.legend(["Ingress Volume", "Egress Volume"], fontsize=font_size - 3, ncol=1)
else:
ax2.get_legend().remove()
ax2.grid(axis='y')
# title
# if not front:
# fig.suptitle(f"On-premises:Cloud {100 - c}%:{c}%", fontsize=font_size + 2)
# Save the figure
plt.savefig(f'overall_comparison_c_{c}{suffix}.pdf')
plt.close()
print(f"Saved overall_comparison_c_{c}{suffix}.pdf")