def draw_job_routing()

in scripts/figs.py [0:0]


def draw_job_routing():
    def process(df: pd.DataFrame, tag: str):
        # header: period,mode,cloud_computation_ratio,cloud_computation_target,
        # ingress_byte_Presto,egress_byte_Presto,ingress_byte_Spark,egress_byte_Spark,
        # P90_traffic_bps,P95_traffic_bps,P99_traffic_bps,
        # movement_ingress_bytes,movement_egress_bytes,rep_bytes,sample_rate
        df['egress_volume'] = (df['egress_byte_Presto'] + df['egress_byte_Spark'] +
                                 df['movement_egress_bytes'])
        df['egress_cost'] = df['egress_volume'] / 1024 ** 3 * 0.02
        df['traffic_volume'] = (df['ingress_byte_Presto'] + df['ingress_byte_Spark'] +
                                df['egress_volume'] + df['movement_ingress_bytes'])
        df['tag'] = tag
        df = df[['tag', 'cloud_computation_target', 'traffic_volume', 'egress_cost', 'mode']]

        return df

    colors = ['darkorange', 'blue', 'dodgerblue', 'cyan']
    medianprops = dict(linestyle='-', linewidth=2, color='gold')

    # header: mode,cloud_computation_target,traffic_volume,egress_cost
    df_moirai = process(pd.read_csv('../sample_1.000_rep0.002/log.csv'), "Moirai")
    df_yugong = process(pd.read_csv('../yugong_results_rep0.002/log.csv'), "Yugong")

    # fig(a): weekly traffic volume
    fig, axes = plt.subplots(1, 3, figsize=(12, 5.5), sharey=True)
    for idx, c in enumerate([30, 50, 70]):
        ax = axes[idx]
        df_c = pd.concat([df_moirai[df_moirai['cloud_computation_target'] == c],
                          df_yugong[df_yugong['cloud_computation_target'] == c]])

        box_data = []
        box_data.append(df_c[df_c['tag'] == 'Yugong']['traffic_volume']) # Yugong
        for mode in ['independent', 'size-unaware','size-predict', 'size-aware']:
            box_data.append(df_c[(df_c['mode'] == mode) & (df_c['tag'] == 'Moirai')]['traffic_volume'])

        positions = [1, 2.2, 2.5, 2.9, 3.3]
        bp = ax.boxplot(box_data, patch_artist=True, positions=positions, widths=0.2, showfliers=False,
                        showmeans=True, whis=[10, 90], medianprops=medianprops)

        # Set boxplot colors
        for patch, color in zip(bp['boxes'], colors):
            patch.set_facecolor(color)

        # Set title and labels
        ax.set_title(f"On-prem:Cloud\n{100 - c}%:{c}%", fontsize=font_size-2)
        ax.set_xlabel(None)
        ax.set_yscale('log', base=2)
        ax.set_xticks([1, 2.7])
        ax.set_xticklabels(['Yugong', 'Moirai'], fontsize=font_size - 2)
        ax.set_yticks([2** 40 * 2 ** i for i in [6, 7, 8, 9, 10, 13, 15, 16, 17]])
        ax.set_yticklabels([f"{2 ** i:.0f}TB" if i < 10 else f"{2 ** i / 1024:.0f}PB"
                                   for i in [6, 7, 8, 9, 10, 13, 15, 16, 17]], fontsize=font_size - 2)
        ax.axhline(y=11.5 * 1024**5, color='red', linestyle='--', linewidth=1.4)
        if c == 30:
            ax.text(0.06, 10.5 * 1024**5, '11.5PB', ha='center', fontsize=font_size - 2, color='red', rotation=0)
            ax.text(2.5, 13 * 1024**5, 'Network threshold', ha='center', fontsize=font_size - 6, color='red', rotation=0)
            ax.set_ylabel("Weekly Traffic Volume (log)")

        ax.tick_params(axis='x', labelsize=font_size - 2)
        ax.grid(axis='y')

        ax.text(2.2, np.percentile(box_data[1], 90) * 1.07, 'Indep', ha='center', fontsize=font_size - 6, color='black')
        ax.text(1.9, np.percentile(box_data[2], 90) * 0.6, 'Size\nUnaware', ha='center', fontsize=font_size - 6,)

        ax.text(2.95, np.percentile(box_data[3], 90) * 1.1, 'Size\nPredict', ha='center', fontsize=font_size - 6, color='black')
        ax.text(3.3, np.percentile(box_data[4], 40) * 0.3, 'Size\nOracular', ha='center', fontsize=font_size - 6, color='black')
    plt.tight_layout()
    plt.savefig('routing_traffic.pdf')
    plt.close()

    # fig(b): weekly egress cost
    fig, axes = plt.subplots(1, 3, figsize=(12, 5.5), sharey=True)
    for idx, c in enumerate([30, 50, 70]):
        ax = axes[idx]
        df_c = pd.concat([df_moirai[df_moirai['cloud_computation_target'] == c],
                            df_yugong[df_yugong['cloud_computation_target'] == c]])
        box_data = []
        box_data.append(df_c[df_c['tag'] == 'Yugong']['egress_cost'])  # Yugong
        for mode in ['independent', 'size-unaware', 'size-predict', 'size-aware']:
            box_data.append(df_c[(df_c['mode'] == mode) & (df_c['tag'] == 'Moirai')]['egress_cost'])

        positions = [1, 2.2, 2.5, 2.9, 3.3]
        bp = ax.boxplot(box_data, patch_artist=True, positions=positions, widths=0.2, showfliers=False,
                        showmeans=True, whis=[10, 90], medianprops=medianprops)

        # Set boxplot colors
        for patch, color in zip(bp['boxes'], colors):
            patch.set_facecolor(color)

        # Set title and labels
        ax.set_title(f"On-prem:Cloud\n{100 - c}%:{c}%", fontsize=font_size - 2)
        ax.set_xlabel(None)
        ax.set_yscale('log', base=2)
        ax.set_xticks([1, 2.7])
        ax.set_xticklabels(['Yugong', 'Moirai'], fontsize=font_size - 2)
        ax.set_yticks([200, 500, 2000, 5000, 50000, 500000, 5000000])
        ax.set_yticklabels(["$200", "$500", "$2K", "$5K", "$50K", "$500K", "$5M"],
                           fontsize=font_size - 2)
        if c == 30:
            ax.set_ylabel("Weekly Egress Cost (log)")

        ax.tick_params(axis='x', labelsize=font_size - 2)
        ax.grid(axis='y')

        ax.text(2.2, np.percentile(box_data[1], 90) * 1.07, 'Indep', ha='center', fontsize=font_size - 6, color='black')
        ax.text(1.9, np.percentile(box_data[2], 40), 'Size\nUnaware', ha='center', fontsize=font_size - 6, )

        ax.text(2.95, np.percentile(box_data[3], 90) * 1.1, 'Size\nPredict', ha='center', fontsize=font_size - 6,
                color='black')
        ax.text(3.3, np.percentile(box_data[4], 40) * 0.3, 'Size\nOracular', ha='center', fontsize=font_size - 6,
                color='black')
    plt.tight_layout()
    plt.savefig('routing_cost.pdf')
    plt.close()