in lib/analysis.py [0:0]
def calculate_histogram_tests_subsampling(control_data, branch_data, result):
bins_control = control_data["bins"]
counts_control = control_data["counts"]
control_sample = create_subsample(bins_control, counts_control)
bins_branch = branch_data["bins"]
counts_branch = branch_data["counts"]
branch_sample = create_subsample(bins_branch, counts_branch)
# Calculate t-test and effect
x1 = np.mean(control_sample)
s1 = np.std(control_sample)
n1 = len(control_sample)
x2 = np.mean(branch_sample)
s2 = np.std(branch_sample)
n2 = len(branch_sample)
effect = calc_cohen_d(x1, x2, s1, s2, n1, n2)
[t, p] = stats.ttest_ind(control_sample, branch_sample)
result["tests"]["ttest"] = {}
result["tests"]["ttest"]["score"] = t
result["tests"]["ttest"]["p-value"] = p
result["tests"]["ttest"]["effect"] = effect
# Calculate mwu-test
[U, p] = stats.mannwhitneyu(control_sample, branch_sample)
r = rank_biserial_correlation(n1, n2, U)
result["tests"]["mwu"] = {}
result["tests"]["mwu"]["score"] = U
result["tests"]["mwu"]["p-value"] = p
result["tests"]["mwu"]["effect"] = r
# Calculate ks-test
[D, p] = stats.ks_2samp(control_sample, branch_sample)
result["tests"]["ks"] = {}
result["tests"]["ks"]["score"] = D
result["tests"]["ks"]["p-value"] = p
result["tests"]["ks"]["effect"] = D