in archived/inference-recommender-with-python-sdk/cloudwatch.py [0:0]
def get_endpoint_metrics(sm_client, cw_client, region, job_name, include_plots=False):
df = get_job_results_as_dataframe(sm_client, job_name)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)
for record in df.to_dict('records'):
if include_plots:
fig = plt.figure(figsize=(20, 16), constrained_layout=True)
fig.suptitle(f"Instance type {record['InstanceType']} Endpoint {record['EndpointName']}",
fontsize=16)
spec = gridspec.GridSpec(ncols=3, nrows=3, figure=fig)
f_ax1 = fig.add_subplot(spec[0, 0])
f_ax2 = fig.add_subplot(spec[0, 1])
f_ax3 = fig.add_subplot(spec[0, 2])
f_ax4 = fig.add_subplot(spec[1, 0])
f_ax5 = fig.add_subplot(spec[1, 1])
f_ax6 = fig.add_subplot(spec[1, 2])
f_ax7 = fig.add_subplot(spec[2, 0])
f_ax8 = fig.add_subplot(spec[2, 1])
f_ax9 = fig.add_subplot(spec[2, 2])
invocation_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'], 'Invocations',
record['StartTime'], record['EndTime'])
sorted_invocation_data = sort_cw_datapoints_by_timestamp(invocation_data['Datapoints'])
if include_plots:
f_ax1_x = get_x_from_datapoints(sorted_invocation_data)
f_ax1_y = get_y_from_datapoints(sorted_invocation_data, 'Sum')
f_ax1.set_title('Invocations')
f_ax1.set_ylabel('No of Invocations')
f_ax1.plot(f_ax1_x, f_ax1_y)
model_latency_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'], 'ModelLatency',
record['StartTime'], record['EndTime'])
sorted_model_latency_data = sort_cw_datapoints_by_timestamp(model_latency_data['Datapoints'])
if include_plots:
f_ax2_x = get_x_from_datapoints(sorted_model_latency_data)
f_ax2_y = get_y_from_extended_datapoints(sorted_model_latency_data, 'p99')
f_ax2.set_title('ModelLatency')
f_ax2.set_ylabel(get_unit_from_datapoints(sorted_model_latency_data))
f_ax2.plot(f_ax2_x, f_ax2_y)
overhead_latency_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'OverheadLatency', record['StartTime'], record['EndTime'])
sort_overhead_latency_data = sort_cw_datapoints_by_timestamp(overhead_latency_data['Datapoints'])
if include_plots:
f_ax3_x = get_x_from_datapoints(sort_overhead_latency_data)
f_ax3_y = get_y_from_extended_datapoints(sort_overhead_latency_data, 'p99')
f_ax3.set_title('OverheadLatency')
f_ax3.set_ylabel(get_unit_from_datapoints(sort_overhead_latency_data))
f_ax3.plot(f_ax3_x, f_ax3_y)
cpu_utilization_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'CPUUtilization', record['StartTime'], record['EndTime'])
sorted_cpu_utilization_data = sort_cw_datapoints_by_timestamp(cpu_utilization_data['Datapoints'])
if include_plots:
f_ax4_x = get_x_from_datapoints(sorted_cpu_utilization_data)
f_ax4_y = get_y_from_datapoints(sorted_cpu_utilization_data, 'Maximum')
f_ax4.set_title('CPUUtilization')
f_ax4.set_ylabel(get_unit_from_datapoints(sorted_cpu_utilization_data))
f_ax4.plot(f_ax4_x, f_ax4_y)
memory_utilization_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'MemoryUtilization', record['StartTime'], record['EndTime'])
sorted_memory_utilization_data = sort_cw_datapoints_by_timestamp(memory_utilization_data['Datapoints'])
if include_plots:
f_ax5_x = get_x_from_datapoints(sorted_memory_utilization_data)
f_ax5_y = get_y_from_datapoints(sorted_memory_utilization_data, 'Maximum')
f_ax5.set_title('MemoryUtilization')
f_ax5.set_ylabel(get_unit_from_datapoints(sorted_memory_utilization_data))
f_ax5.plot(f_ax5_x, f_ax5_y)
disk_utilization_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'DiskUtilization', record['StartTime'], record['EndTime'])
sorted_disk_utilization_data = sort_cw_datapoints_by_timestamp(disk_utilization_data['Datapoints'])
if include_plots:
f_ax6_x = get_x_from_datapoints(sorted_disk_utilization_data)
f_ax6_y = get_y_from_datapoints(sorted_disk_utilization_data, 'Maximum')
f_ax6.set_title('DiskUtilization')
f_ax6.set_ylabel(get_unit_from_datapoints(sorted_disk_utilization_data))
f_ax6.plot(f_ax6_x, f_ax6_y)
user_error_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'Invocation4XXErrors', record['StartTime'], record['EndTime'])
sorted_user_error_data = sort_cw_datapoints_by_timestamp(user_error_data['Datapoints'])
if include_plots:
f_ax7_x = get_x_from_datapoints(sorted_user_error_data)
f_ax7_y = get_y_from_datapoints(sorted_user_error_data, 'Sum')
f_ax7.set_title('Invocation4XXErrors')
f_ax7.set_ylabel(get_unit_from_datapoints(sorted_user_error_data))
f_ax7.plot(f_ax7_x, f_ax7_y)
system_error_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'Invocation5XXErrors', record['StartTime'], record['EndTime'])
sorted_system_error_data = sort_cw_datapoints_by_timestamp(system_error_data['Datapoints'])
if include_plots:
f_ax8_x = get_x_from_datapoints(sorted_system_error_data)
f_ax8_y = get_y_from_datapoints(sorted_system_error_data, 'Sum')
f_ax8.set_title('Invocation5XXErrors')
f_ax8.set_ylabel(get_unit_from_datapoints(sorted_system_error_data))
f_ax8.plot(f_ax8_x, f_ax8_y)
per_instance_data = get_cw_metrics(cw_client, record['EndpointName'], record['VariantName'],
'InvocationsPerInstance', record['StartTime'], record['EndTime'])
sorted_per_instance_data = sort_cw_datapoints_by_timestamp(per_instance_data['Datapoints'])
if include_plots:
f_ax9_x = get_x_from_datapoints(sorted_per_instance_data)
f_ax9_y = get_y_from_datapoints(sorted_per_instance_data, 'Sum')
f_ax9.set_title('InvocationsPerInstance')
f_ax9.set_ylabel(get_unit_from_datapoints(sorted_per_instance_data))
f_ax9.plot(f_ax9_x, f_ax9_y)
if include_plots:
plt.show()
return df