def draw_overall

def draw_overall_new()

in scripts/figs.py [0:0]
127 lines of code
40 McCabe index (conditional complexity)

def draw_overall_new(front: bool = False,
                     job: bool = False,
                     pr: bool = False):
    assert not front or not pr or not job, "Only one of front, job, and pr can be True"
    todo_dfs = []

    # header: tag, c, network_cost, egress_cost_Spark, egress_cost_Presto, rep_cost,
    # ingress_volume_Spark, ingress_volume_Presto, egress_volume_Spark, egress_volume_Presto
    baseline_df = pd.read_csv(f'../baselines_done/log.csv')
    if front:
        suffix = "_front"
        todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai\n(Our)"))
        todo_dfs.append(overall_stats(pd.read_csv(f'../yugong_results_rep0.000/log.csv'), "Yugong\n(Alibaba)"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "No\nRep"], "No Rep\n(Spotify)"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "3M\n21%"], "Rep 3Mon.\n(Twitter)"))
    elif job:
        suffix = "_job"
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n2.5%"], "Volley\nRepTop2.5%"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "MoiJob\n0.2%"], "Moi-\nJobDist"))
        todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai"))
    elif pr:
        suffix = "_pr"
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "MoiJob\n0.2%"], "Moi\nJobDist"))
        for rate in [0.001, 0.002, 0.004]:
            df = pd.read_csv(f'../sample_1.000_rep{rate:.3f}/log.csv')
            todo_dfs.append(overall_stats(df, f"Moi\nPR{rate * 100:.1f}%"))
        for sample_rate in [0.010, 0.050]:  # 0.001,
            df = pd.read_csv(f'../sample_{sample_rate:.3f}/log.csv')
            todo_dfs.append(overall_stats(df, f"Moi\n{sample_rate * 100:.0f}%Job"))
    else:
        suffix = ""
        todo_dfs.append(overall_stats(pd.read_csv(f'../sample_1.000_rep0.002/log.csv'), "Moirai"))
        todo_dfs.append(overall_stats(pd.read_csv(f'../yugong_results_rep0.000/log.csv'), "Yugong"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n0%"], "Volley"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "RTD\n2.5%"], "Rep\nTop2.5%"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "No\nRep"], "No\nRep"))
        todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "3M\n21%"], "Rep\n3Mon."))
        #todo_dfs.append(overall_stats(baseline_df[baseline_df['tag'] == "Volley\n2.5%"], "Volley\nATD"))


    df = pd.concat(todo_dfs)
    df.to_csv('overall_stats_new.csv', index=False)

    for c in df['c'].unique():
        if front:
            df_c = df[df['c'] == c].set_index('tag').loc[["No Rep\n(Spotify)", "Rep 3Mon.\n(Twitter)", "Yugong\n(Alibaba)", "Moirai\n(Our)"]]  # Ensure order
        elif job:
            df_c = df[df['c'] == c].set_index('tag').loc[["Volley\nRepTop2.5%", "Moi-\nJobDist", "Moirai"]]
        elif pr:
            df_c = df[df['c'] == c].set_index('tag').loc[["Moi\nJobDist", "Moi\nPR0.1%", "Moi\nPR0.2%", "Moi\nPR0.4%", "Moi\n1%Job", "Moi\n5%Job"]] # "Volley\nATD",
        else:
            df_c = df[df['c'] == c].set_index('tag').loc[["No\nRep", "Volley", "Rep\n3Mon.", "Rep\nTop2.5%", "Yugong", "Moirai"]]  # Ensure order "Volley\nATD",
        print(df_c)

        # Create subplots

        if front:
            fig, ax1 = plt.subplots(1, 1, figsize=(6, 4.5), constrained_layout=True)
        else:
            fig, (ax2, ax1) = plt.subplots(1, 2, figsize=(11, 4), constrained_layout=True)

        # ---- PLOT 1: Cost Breakdown ---- #
        df_costs = df_c[['egress_cost', 'rep_cost', 'network_cost']]
        # df_costs.set_index('tag', inplace=True)
        df_costs.plot(kind='bar', stacked=True, ax=ax1, color=[
            replication_cost_color, egress_cost_color, network_cost_color])

        # Apply hatch patterns to the bars
        for bar, hatch in zip(ax1.containers, hatch_patterns[:len(df_costs.columns)]):
            for patch in bar.patches:
                patch.set_hatch(hatch)

        # Add total sum as a single number on top of each bar
        for idx, rects in enumerate(zip(*ax1.containers)):  # Stacked bars
            total_height = sum(rect.get_height() for rect in rects)
            if total_height > 0:
                ax1.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
                         f'{total_height / 1000:.0f}K' if total_height < 1000000 else f'{total_height / 1000**2:.1f}M',
                         ha='center', va='bottom', fontsize=font_size - 2, color='black')
        if not front and not pr and not job:
            for idx, tag in enumerate(["No\nRep", "Volley", "Rep\n3Mon.", "Rep\nTop2.5%", "Volley\nRepTop2.5%"]):
                if tag in df_costs.index:
                    total_cost = df_c.loc[tag, "total_cost"]
                    total_cost_std = df_c.loc[tag, "total_cost_std"]
                    ax1.errorbar(x=idx, y=total_cost, yerr=total_cost_std, color='black', capsize=5,
                                label="Std Dev" if idx == 0 else "")

        ax1.set_ylabel("Weekly Cost ($)", fontsize=font_size)
        ax1.set_xlabel(None)
        ax1.tick_params(rotation=0, labelsize=font_size - 2)
        if not front:
            ax1.set_xticklabels(df_costs.index, fontsize=font_size - 5, rotation=15)
        else:
            ax1.set_xticklabels(df_costs.index, fontsize=font_size - 3, rotation=0)

        if pr or job:
            yticks = [0, 20 * 1000, 40 * 1000, 60 * 1000, 80 * 1000, 100 * 1000, 120 * 1000]
            ytick_labels = ["0", "20K", "40K", "60K", "80K", "100K", "120K"]
        else:
            yticks = [0, 300 * 1000, 600 * 1000, 900 * 1000, 1200 * 1000, 1500 * 1000]
            ytick_labels = ["0", "300K", "600K", "900K", "1200K", "1500K"]

        ax1.set_yticks(yticks)
        ax1.set_yticklabels(ytick_labels, fontsize=font_size - 2)
        if c == 30 or front:
            ax1.legend(["Egress", "Replication", "Network"], fontsize=font_size - 2, ncol=1) # , loc='upper center'
        else:
            ax1.get_legend().remove()
        ax1.grid(axis='y')

        # ---- PLOT 2: Traffic Breakdown ---- #
        if not front:
            df_traffic = df_c[
                ['ingress_volume', 'egress_volume']]
            # df_traffic.set_index('tag', inplace=True)
            df_traffic.plot(kind='bar', stacked=True, ax=ax2, color=[
                ingress_traffic_color, egress_traffic_color])

            # Apply hatch patterns to the traffic bars
            for bar, hatch in zip(ax2.containers, hatch_patterns[len(df_costs.columns):]):
                for patch in bar.patches:
                    patch.set_hatch(hatch)

            # Add total sum as a single number on top of each bar
            for idx, rects in enumerate(zip(*ax2.containers)):  # Stacked bars
                total_height = sum(rect.get_height() for rect in rects)
                if total_height > 1024:
                    ax2.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
                             f'{total_height / 1024:.1f}PB', ha='center', va='bottom', fontsize=font_size - 6,
                             color='black')
                else:
                    ax2.text(rects[0].get_x() + rects[0].get_width() / 2, total_height,
                             f'{total_height:.0f}TB', ha='center', va='bottom', fontsize=font_size - 6,
                             color='black')

            ax2.set_ylabel("Weekly Traffic", fontsize=font_size)
            ax2.set_xlabel(None)
            ax2.tick_params(rotation=0, labelsize=font_size - 2)
            ax2.set_xticklabels(df_costs.index, fontsize=font_size - 5, rotation=15)
            if pr or job:
                yticks = [0, 2 * 1024, 4 * 1024, 6 * 1024, 8 * 1024]
                ytick_labels = ["0", "2PB", "4PB", "6PB", "8PB"]
            else:
                yticks = [0, 30 * 1024, 60 * 1024, 90 * 1024, 120 * 1024]
                ytick_labels = ["0", "30PB", "60PB", "90PB", "120PB"]

            ax2.set_yticks(yticks)
            ax2.set_yticklabels(ytick_labels, fontsize=font_size - 2)
            if c == 30:
                ax2.legend(["Ingress Volume", "Egress Volume"], fontsize=font_size - 3, ncol=1)
            else:
                ax2.get_legend().remove()
            ax2.grid(axis='y')

        # title
        # if not front:
        #     fig.suptitle(f"On-premises:Cloud {100 - c}%:{c}%", fontsize=font_size + 2)

        # Save the figure
        plt.savefig(f'overall_comparison_c_{c}{suffix}.pdf')
        plt.close()
        print(f"Saved overall_comparison_c_{c}{suffix}.pdf")