in right_size_your_sagemaker_endpoints/load_test_helper.py [0:0]
def generate_plots(endpoints, endpoints_dict, results_folder, sep_cpu_gpu=False):
"""
Generate plots comparing the performance vs price
This function calls the get_pricing function to get instance prices,
and plots them against the performance of each endpoint.
Inputs:
endpoints: list of endpoint names
endpoints_dict: list of endpoint names with instance counts
results_folder: path where load test results are saved
sep_cpu_gpu: bool whether to plot CPU and GPU results in separate plots.
defaults to False
Output:
Matplotlib plot showing performance against price.
"""
prices = {}
for item in endpoints_dict:
instance = item['instance_type']
count = item['instance_count']
cost = get_pricing(instance)
prices.update({
f"{instance}.x{count}" : cost * count
})
# Get max requests for all instance types
max_requests = {}
for ep in endpoints:
prefix = f"test_{ep}"
df = pd.read_csv(f"{results_folder}/{prefix}/{prefix}_stats_history.csv")
fail_at_1 = df.tail(1)['Requests/s'].values[0]
max_requests.update({
ep.split("-", 1)[1].replace("-", "."): fail_at_1
})
results = pd.DataFrame([prices, max_requests]).T
results.columns = ['Price per Hour', 'Max Requests per Second']
# Round down requests per second to integer
results['Max Requests per Second'] = results['Max Requests per Second'].apply(
lambda x: math.floor(x)
)
# get cpu-gpu flag
results['type'] = results.index.str.split('.')
results['type'] = results['type'].apply(lambda x: x[1])
results['gpu_flag'] = results['type'].apply(lambda x: 1 if x.startswith(('p', 'g', 'e', 'i')) else 0)
results.drop(['type'], axis=1, inplace=True)
if sep_cpu_gpu:
cpu_df = results[results['gpu_flag'] == 0]
gpu_df = results[results['gpu_flag'] == 1]
fig, ax = plt.subplots(1,2, figsize=(15,6))
# plot cpu instances
ax[0].scatter(cpu_df['Price per Hour'], cpu_df['Max Requests per Second'])
ax[0].title.set_text('CPU Instances')
ax[0].set_xlabel('Instance Price per Hour')
ax[0].set_ylabel('Max. Requests per Second')
for i, row in cpu_df.iterrows():
ax[0].annotate(i, (row['Price per Hour'], row['Max Requests per Second']))
ax[0].grid('True')
# plot gpu instances
ax[1].scatter(gpu_df['Price per Hour'], gpu_df['Max Requests per Second'])
ax[1].title.set_text('GPU Instances')
ax[1].set_xlabel('Instance Price per Hour')
ax[1].set_ylabel('Max. Requests per Second')
for i, row in gpu_df.iterrows():
ax[1].annotate(i, (row['Price per Hour'], row['Max Requests per Second']))
ax[1].grid('True')
fig.suptitle("Pricing vs Performance Plot")
plt.show()
else:
plt.figure(figsize=(12,7))
for i in results.values:
plt.scatter(i[0], i[1])
plt.title('Pricing vs Performance Plot', fontsize=15)
plt.xlabel('Instance Price per Hour', fontsize=12)
plt.ylabel('Max. Requests per Second', fontsize=12)
plt.legend(results.index)
plt.grid('True')
plt.show()
return results.drop(['gpu_flag'], axis=1)