charts/charts.py (347 lines of code) (raw):

import sys import pandas as pd import matplotlib.pyplot as plt import builder if len(sys.argv) < 2: print("Please provide the path to the CSV file as a command line argument.") sys.exit(1) csv_file = sys.argv[1] data = pd.read_csv(csv_file) try: data = data[(data['name'] == 'service_time')] data = data[data['value.mean'] != '-'] data = data[data['user-tags.product'] != '-'] except: pass try: data['user-tags.product'] = data['meta.tag_product'] except Exception as e: pass try: data['value.90_0'] = data['value.90_0'].str.replace(',', '').astype(float) except: pass try: data['value.100_0'] = data['value.100_0'].str.replace(',', '').astype(float) except: pass try: data['value.mean'] = data['value.mean'].str.replace(',', '').astype(float) except: pass try: data['value'] = data['value'].str.replace(',', '').astype(float) except: pass print(data.head()) fig, ((ax1, ax2, ax3), (ax11, ax21, ax31)) = plt.subplots(2,3) timestamp_sorting = [ 'asc_sort_timestamp', 'asc_sort_timestamp_can_match_shortcut', 'asc_sort_timestamp_no_can_match_shortcut', 'asc_sort_with_after_timestamp', 'desc_sort_timestamp', 'desc_sort_timestamp_can_match_shortcut', 'desc_sort_timestamp_no_can_match_shortcut', 'desc_sort_with_after_timestamp' ] #sort timestamp builder.performance_diff( title='Timestamp Sorting', ylabel='Latency p90', xlabel='', data=data, value_column='value.90_0', ax=ax1, operations=timestamp_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='Latency Distribution', xlabel='', data=data, value_column='value.mean', ax=ax11, operations=timestamp_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) keyword_sorting = [ 'sort_keyword_can_match_shortcut', 'sort_keyword_no_can_match_shortcut' ] # sort keyword builder.performance_diff( title='Keyword Sorting', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax2, operations=keyword_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) # sort keyword builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax21, operations=keyword_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) numeric_sorting = [ 'sort_numeric_desc', 'sort_numeric_asc', 'sort_numeric_desc_with_match', 'sort_numeric_asc_with_match' ] # sort numeric builder.performance_diff( title='Numeric Sorting', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax3, operations=numeric_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) # sort numeric builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax31, operations=numeric_sorting, box_colors=['#005571AA', '#FF7F00AA'] ) fig, ((ax1, ax2), (ax11, ax21)) = plt.subplots(2,2) builder.performance_diff( title='Date Histogram', ylabel='Latency p90', xlabel='', data=data, value_column='value.90_0', ax=ax1, operations=[ 'date_histogram_minute_agg', 'date_histogram_hourly_agg' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='Latency Distribution', xlabel='', data=data, value_column='value.mean', ax=ax11, operations=[ 'date_histogram_minute_agg', 'date_histogram_hourly_agg' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.performance_diff( title='Date Histogram Composite', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax2, operations=[ 'composite-date_histogram-daily', 'composite-date_histogram-monthly', 'composite-date_histogram-weekly' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax21, operations=[ 'composite-date_histogram-daily', 'composite-date_histogram-monthly', 'composite-date_histogram-weekly' ], box_colors=['#005571AA', '#FF7F00AA'] ) fig, ((ax1, ax2), (ax11, ax21)) = plt.subplots(2,2) range_query = [ 'range','range-numeric','keyword-in-range','range_field_conjunction_big_range_big_term_query','range_field_disjunction_big_range_small_term_query','range-auto-date-histo-with-metrics','range_field_conjunction_small_range_big_term_query','range_field_conjunction_small_range_small_term_query' ] builder.performance_diff( title='Range query', ylabel='Latency p90', xlabel='', data=data, value_column='value.90_0', ax=ax1, operations=range_query, box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='Latency Distribution', xlabel='', data=data, value_column='value.mean', ax=ax11, operations=range_query, box_colors=['#005571AA', '#FF7F00AA'] ) builder.performance_diff( title='Range Aggregation', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax2, operations=[ 'range-auto-date-histo' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax21, operations=[ 'range-auto-date-histo' ], box_colors=['#005571AA', '#FF7F00AA'] ) fig, ((ax1, ax2, ax3), (ax11, ax21, ax31)) = plt.subplots(2,3) builder.performance_diff( title='Terms Query', ylabel='Latency p90', xlabel='', data=data, value_column='value.90_0', ax=ax1, operations=[ 'term' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='Latency Distribution', xlabel='', data=data, value_column='value.mean', ax=ax11, operations=[ 'term' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.performance_diff( title='Terms Aggregation', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax2, operations=[ 'multi_terms-keyword', 'keyword-terms-low-cardinality', 'keyword-terms' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax21, operations=[ 'multi_terms-keyword', 'keyword-terms-low-cardinality', 'keyword-terms' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.performance_diff( title='Terms Agg. (Composite)', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax3, operations=[ 'composite-terms', 'composite-terms-keyword' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax31, operations=[ 'composite-terms', 'composite-terms-keyword' ], box_colors=['#005571AA', '#FF7F00AA'] ) fig, ((ax1, ax2), (ax11, ax21)) = plt.subplots(2,2) builder.performance_diff( title='Simple Query', ylabel='Latency p90', xlabel='', data=data, value_column='value.90_0', ax=ax1, operations=[ 'query-string-on-message', 'query-string-on-message-filtered' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='Latency Distribution', xlabel='', data=data, value_column='value.mean', ax=ax11, operations=[ 'query-string-on-message', 'query-string-on-message-filtered' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.performance_diff( title='Simple Query Sorted Numeric', ylabel='', xlabel='', data=data, value_column='value.90_0', ax=ax2, operations=[ 'query-string-on-message-filtered-sorted-num' ], box_colors=['#005571AA', '#FF7F00AA'] ) builder.boxplot( title='', ylabel='', xlabel='', data=data, value_column='value.mean', ax=ax21, operations=[ 'query-string-on-message-filtered-sorted-num' ], box_colors=['#005571AA', '#FF7F00AA'] ) plt.show()