def generate_plots()

in right_size_your_sagemaker_endpoints/load_test_helper.py [0:0]


def generate_plots(endpoints, endpoints_dict, results_folder, sep_cpu_gpu=False):
    """
    Generate plots comparing the performance vs price
    
    This function calls the get_pricing function to get instance prices,
    and plots them against the performance of each endpoint. 
    
    Inputs:
    endpoints: list of endpoint names
    endpoints_dict: list of endpoint names with instance counts
    results_folder: path where load test results are saved
    sep_cpu_gpu: bool whether to plot CPU and GPU results in separate plots.
        defaults to False
        
    Output:
    Matplotlib plot showing performance against price.
    """
    prices = {}

    for item in endpoints_dict:
        instance = item['instance_type']
        count = item['instance_count']
        cost = get_pricing(instance)
        prices.update({
            f"{instance}.x{count}" : cost * count
        })

    # Get max requests for all instance types
    max_requests = {}

    for ep in endpoints:
        prefix = f"test_{ep}"
        df = pd.read_csv(f"{results_folder}/{prefix}/{prefix}_stats_history.csv")
        fail_at_1 = df.tail(1)['Requests/s'].values[0]
        max_requests.update({
            ep.split("-", 1)[1].replace("-", "."): fail_at_1
        })
     

    results = pd.DataFrame([prices, max_requests]).T
    results.columns = ['Price per Hour', 'Max Requests per Second']
    # Round down requests per second to integer
    results['Max Requests per Second'] = results['Max Requests per Second'].apply(
        lambda x: math.floor(x)
    )
    
    # get cpu-gpu flag
    results['type'] = results.index.str.split('.')
    results['type'] = results['type'].apply(lambda x: x[1])
    results['gpu_flag'] = results['type'].apply(lambda x: 1 if x.startswith(('p', 'g', 'e', 'i')) else 0)
    results.drop(['type'], axis=1, inplace=True)

    if sep_cpu_gpu:
        cpu_df = results[results['gpu_flag'] == 0]
        gpu_df = results[results['gpu_flag'] == 1]
        fig, ax = plt.subplots(1,2, figsize=(15,6))
        
        # plot cpu instances
        ax[0].scatter(cpu_df['Price per Hour'], cpu_df['Max Requests per Second'])
        ax[0].title.set_text('CPU Instances')
        ax[0].set_xlabel('Instance Price per Hour')
        ax[0].set_ylabel('Max. Requests per Second')
        for i, row in cpu_df.iterrows():
            ax[0].annotate(i, (row['Price per Hour'], row['Max Requests per Second']))
        ax[0].grid('True')
        
        # plot gpu instances
        ax[1].scatter(gpu_df['Price per Hour'], gpu_df['Max Requests per Second'])
        ax[1].title.set_text('GPU Instances')
        ax[1].set_xlabel('Instance Price per Hour')
        ax[1].set_ylabel('Max. Requests per Second')
        for i, row in gpu_df.iterrows():
            ax[1].annotate(i, (row['Price per Hour'], row['Max Requests per Second']))
        ax[1].grid('True')
        
        fig.suptitle("Pricing vs Performance Plot")
        plt.show()
        
    else:
        plt.figure(figsize=(12,7))

        for i in results.values:
            plt.scatter(i[0], i[1])

        plt.title('Pricing vs Performance Plot', fontsize=15)
        plt.xlabel('Instance Price per Hour', fontsize=12)
        plt.ylabel('Max. Requests per Second', fontsize=12)
        plt.legend(results.index)
        plt.grid('True')
        plt.show()
    
    return results.drop(['gpu_flag'], axis=1)