def draw_traffic_rate()

in scripts/figs.py [0:0]


def draw_traffic_rate(single=True):
    def traffic_rate_stats(df: pd.DataFrame, tag):
        # header: period,mode,cloud_computation_ratio,cloud_computation_target,
        # ingress_byte_Presto,egress_byte_Presto,ingress_byte_Spark,egress_byte_Spark,
        # P90_traffic_bps,P95_traffic_bps,P99_traffic_bps,
        # movement_ingress_bytes,movement_egress_bytes,rep_bytes,sample_rate
        df = df[df['mode'] == 'size-predict'].copy()
        df['traffic_bytes'] = (df['ingress_byte_Presto'] + df['ingress_byte_Spark'] +
                                 df['egress_byte_Presto'] + df['egress_byte_Spark'] +
                                    df['movement_ingress_bytes'] + df['movement_egress_bytes'])
        df['avg_traffic_bps'] = df['traffic_bytes'] * 8 / 7 / 24 / 3600

        # Extract start date from period (YYYYMMDD format)
        df['start_date'] = df['period'].str[:8]  # Extract YYYYMMDD
        df['start_date'] = pd.to_datetime(df['start_date'], format='%Y%m%d')  # Convert to datetime

        # Calculate week_id based on 2024-10-22 as the reference date
        reference_date = datetime(2024, 10, 22)
        df['week_id'] = ((df['start_date'] - reference_date).dt.days // 7 + 1).astype(int)  # Compute week index

        df['tag'] = tag
        df.rename(columns={'P90_traffic_bps': 'P90', 'P95_traffic_bps': 'P95', 'P99_traffic_bps': 'P99'}, inplace=True)
        df['P90'] = df['P90'] / 1024 ** 3
        df['P95'] = df['P95'] / 1024 ** 3
        df['P99'] = df['P99'] / 1024 ** 3
        df['avg_traffic_bps'] = df['avg_traffic_bps'] / 1024 ** 3

        return df[['tag', 'week_id', 'cloud_computation_target', 'avg_traffic_bps',
                   'P90', 'P95', 'P99']]


    colors = {
        "Yugong": colors_default[0],
        "Moirai": colors_default[1]
    }

    markers = {
        'Yugong': 's',
        'Moirai': '*'
    }

    todo_dfs = []
    #for rate in [0.001, 0.002, 0.004]:
    for rate in [0.002]:
        df = pd.read_csv(f'../sample_1.000_rep{rate:.3f}/log.csv')
        todo_dfs.append(traffic_rate_stats(df, f"Moirai"))
    for rep_rate in [0.002]:
        df = pd.read_csv(f'../yugong_results_rep{rep_rate:.3f}/log.csv')
        todo_dfs.append(traffic_rate_stats(df, f"Yugong"))

    # Concatenate all processed data
    # header: period,cloud_computation_target,avg_traffic_bps,P90_traffic_bps,P95_traffic_bps,P99_traffic_bps
    df = pd.concat(todo_dfs)
    df.to_csv('traffic_rate_stats.csv', index=False)

    for metric in ['P90', 'P95', 'P99']:
        if metric == 'P90':
            ylim = 600
        elif metric == 'P95':
            ylim = 900
        else:
            ylim = 1500
        if single:
            fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
            c_list = [50]
        else:
            # Create a figure with 3 subplots (1 row, 3 columns)
            fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(22, 7))
            c_list = [30, 50, 70]

        for idx, c in enumerate(c_list):
            df_c = df[df['cloud_computation_target'] == c]
            if single:
                ax = axes
            else:
                ax = axes[idx]

            for tag in ['Yugong', 'Moirai']:
                sub_df = df_c[df_c['tag'] == tag].copy()
                sub_df.sort_values(by='week_id', inplace=True)

                # Plot the PXX traffic and the average traffic for comparison
                ax.plot(sub_df['week_id'], sub_df['avg_traffic_bps'], linestyle='--',
                        label=f'{tag} Avg', color=colors[tag],
                        marker=markers[tag], markersize=8)
                ax.plot(sub_df['week_id'], sub_df[metric], linestyle='-',
                        label=f'{tag} {metric}', color=colors[tag],
                        marker=markers[tag], markersize=8)
            # Set title and labels
            if not single:
                ax.set_title(f"On-prem:Cloud={100 - c}%:{c}%")
            ax.set_xlabel('Week')
            ax.axhline(y=160, color='red', linestyle='--', linewidth=2)
            if idx == 0:
                ax.set_ylabel('Traffic Rate (Gbps)')
                ax.legend(fontsize=font_size - 3, ncol=2)
                ax.text(0.65, 120, '160', color='red', ha='center', fontsize=20)
                ax.text(5, 110, 'Network threshold', color='red', ha='center', fontsize=20)
            else:
                ax.set_ylabel(None)
                ax.set_yticklabels([])

            ax.set_ylim(0, ylim)
            ax.grid(axis='y')

            # Create a secondary y-axis for cost
            ax2 = ax.twinx()

            ax2.set_ylim(0, ylim / 100 * 7 * 24 * 23.3)
            for tag in ['Yugong', 'Moirai']:
                sub_df = df_c[df_c['tag'] == tag].copy()
                sub_df.sort_values(by='week_id', inplace=True)
                ax2.plot(sub_df['week_id'], sub_df[metric] / 100 * 7 * 24 * 23.3,
                         linestyle='-', label=f'{tag} Cost', color=colors[tag],
                         marker=markers[tag], markersize=8)
            if idx == len(c_list)-1:
                yticks = ax2.get_yticks()
                ytick_labels = [f"{int(i/1000)}K" for i in yticks]
                ax2.set_yticklabels(ytick_labels)
                ax2.set_ylabel('Weekly Network Cost ($)')
            else:
                ax2.set_yticklabels([])


        # Save the figure
        plt.tight_layout()
        plt.savefig(f'traffic_rate_{metric}.pdf', bbox_inches='tight')
        plt.close()
        print(f"Saved traffic_rate_{metric}.pdf")