def performance_diff()

in charts/builder.py [0:0]


def performance_diff(title, ylabel, xlabel, ax, operations, box_colors, data, value_column='value.90_0'):
  """
  Calculate and visualize performance difference between two products (Elasticsearch and OpenSearch).
  We perform a t-test to compare the 90th percentile latency of Elasticsearch (es) and OpenSearch (va) 
  for the specified operations. We then calculate the percentage difference in performance between 
  the two products and plot the mean performance values of each product as a bar chart.

  Parameters:
      title (str): Title for the plot.
      ylabel (str): Label for the y-axis of the plot.
      xlabel (str): Label for the x-axis of the plot.
      ax (matplotlib.axes._subplots.AxesSubplot): Matplotlib AxesSubplot object to plot the bar chart.
      operations (list): List of operations to include in the analysis.
      box_colors (list): List of colors for the bars in the bar chart.
      data (pandas.DataFrame): DataFrame containing performance data.
      value_column (str, optional): The column in the DataFrame containing the performance values.
                                    Default is 'value.90_0'.

  """  

  data = data[data['operation'].isin(operations)]

  es = data[data['user-tags.product']=='Elasticsearch']
  va = data[data['user-tags.product']=='OpenSearch']

  ttest = ttest_ind(es[value_column], va[value_column])

  es_mean = es[value_column].mean()
  va_mean = va[value_column].mean()

  performance_difference_pct = 100 * (abs(va_mean - es_mean)/ ((es_mean+va_mean)/2)) 

  print(f"===== {title} - {performance_difference_pct:.1f}% ")
  print(f"===== (90th percentile latency of {es_mean:.0f}ms vs. {va_mean:.0f}ms, p<0.01) p={ttest.pvalue:.5f} | Samples: {len(data.index)*100} requests")

  grouped_data = data.groupby('user-tags.product')[value_column].mean().reset_index()

  ax.bar(grouped_data['user-tags.product'], grouped_data[value_column], color=box_colors)

  ax.set_title(title)
  ax.set_ylabel(ylabel)
  ax.tick_params(axis='both', labelsize=8)