def calculate_histogram_tests_subsampling()

in lib/analysis.py [0:0]


def calculate_histogram_tests_subsampling(control_data, branch_data, result):
  bins_control = control_data["bins"]
  counts_control = control_data["counts"]
  control_sample = create_subsample(bins_control, counts_control)

  bins_branch = branch_data["bins"]
  counts_branch = branch_data["counts"]
  branch_sample = create_subsample(bins_branch, counts_branch)

  # Calculate t-test and effect
  x1 = np.mean(control_sample)
  s1 = np.std(control_sample)
  n1 = len(control_sample)
  x2 = np.mean(branch_sample)
  s2 = np.std(branch_sample)
  n2 = len(branch_sample)
  effect = calc_cohen_d(x1, x2, s1, s2, n1, n2)
  [t, p] = stats.ttest_ind(control_sample, branch_sample)
  result["tests"]["ttest"] = {}
  result["tests"]["ttest"]["score"] = t
  result["tests"]["ttest"]["p-value"] = p
  result["tests"]["ttest"]["effect"] = effect

  # Calculate mwu-test
  [U, p] = stats.mannwhitneyu(control_sample, branch_sample)
  r = rank_biserial_correlation(n1, n2, U)
  result["tests"]["mwu"] = {}
  result["tests"]["mwu"]["score"] = U
  result["tests"]["mwu"]["p-value"] = p
  result["tests"]["mwu"]["effect"] = r

  # Calculate ks-test
  [D, p] = stats.ks_2samp(control_sample, branch_sample)
  result["tests"]["ks"] = {}
  result["tests"]["ks"]["score"] = D
  result["tests"]["ks"]["p-value"] = p
  result["tests"]["ks"]["effect"] = D