prediction_generation/original-project/analysis/scripts/significance.py (129 lines of code) (raw):
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Code to compute significant differences
Author: Gertjan van den Burg
Copyright (c) 2020 - The Alan Turing Institute
License: See the LICENSE file.
"""
import argparse
import math
import scipy.stats as stats
from tabulate import tabulate
from rank_common import (
load_data,
preprocess_data,
compute_ranks,
)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output", help="Output basename")
parser.add_argument(
"-i", "--input", help="Input JSON file with results for each method"
)
parser.add_argument(
"-m",
"--mode",
choices=["global", "reference"],
help="Whether to do a global difference F test or a reference test to the best performing method",
)
parser.add_argument(
"--type",
help="Type of table to make",
choices=["best", "default"],
required=True,
)
return parser.parse_args()
def global_difference(avg_ranks, n_datasets):
N = n_datasets
k = len(avg_ranks)
avg_sq_sum = sum([pow(float(avg_ranks[m]), 2.0) for m in avg_ranks])
chi2 = (
12.0 * N / (k * (k + 1)) * (avg_sq_sum - (k * pow(k + 1, 2.0) / 4.0))
)
chiprob = 1.0 - stats.chi2.cdf(chi2, k - 1)
Fstat = (N - 1.0) * chi2 / (N * (k - 1) - chi2)
Fprob = 1.0 - stats.f.cdf(Fstat, k - 1, (k - 1) * (N - 1))
return Fstat, Fprob
def argmin(func, args):
m, inc = float("inf"), None
for a in args:
v = func(a)
if v < m:
m, inc = v, a
return inc
def reference_difference(avg_ranks, n_datasets, significance_level=0.05):
N = n_datasets
k = len(avg_ranks)
methods = sorted(avg_ranks.keys())
ranks = [avg_ranks[m] for m in methods]
ref_method = argmin(lambda m: avg_ranks[m], methods)
ref_idx = methods.index(ref_method)
others = [m for m in methods if not m == ref_method]
Z_scores = [0.0] * (k - 1)
P_values = [0.0] * (k - 1)
constant = math.sqrt(6 * N / (k * (k + 1)))
for j, method in enumerate(others):
i = methods.index(method)
Z_scores[j] = (ranks[ref_idx] - ranks[i]) * constant
P_values[j] = stats.norm.cdf(Z_scores[j])
# sort the p-values in ascending order
sorted_pvals = sorted((p, i) for i, p in enumerate(P_values))
# Calculate significance differences following Holm's procedure
significant_differences = [False] * (k - 1)
thresholds = [0] * (k - 1)
CD_threshold = None
for i in range(k - 1):
threshold = significance_level / float(k - (i + 1))
pval, idx = sorted_pvals[i]
significant_differences[idx] = pval < threshold
thresholds[idx] = threshold
if pval > threshold and CD_threshold is None:
CD_threshold = threshold
# Calculate the critical difference from the first threshold that failed to
# reject. This works because if the p-value would be below the threshold we
# would consider it significantly different and above the threshold we
# would not.
CD = -1 * stats.norm.ppf(CD_threshold) / constant
txt = [
"Number of datasets: %i" % N,
"Number of methods: %i" % k,
"Reference method: %s" % ref_method,
"Significance level: %g" % significance_level,
"",
"Reference method rank: %.6f" % avg_ranks[ref_method],
"Holm's procedure:",
]
table = []
for o, p, t, s in zip(
others, P_values, thresholds, significant_differences
):
table.append([o, avg_ranks[o], p, t, s])
txt.append(
tabulate(
table,
headers=["Method", "Rank", "p-Value", "Threshold", "Significant"],
)
)
txt.append("")
txt.append(
"Critical difference: %.6f (at threshold = %.6f)" % (CD, CD_threshold)
)
txt.append("")
return ref_method, CD, txt
def main():
args = parse_args()
data = load_data(args.input)
clean, methods = preprocess_data(data, args.type)
n_datasets = len(clean)
avg_ranks, all_ranks = compute_ranks(
clean, keep_methods=methods, higher_better=True
)
if args.mode == "global":
Fstat, Fprob = global_difference(avg_ranks, n_datasets)
if args.output:
with open(args.output, "w") as fp:
fp.write("F = %.1f (p = %g)" % (Fstat, Fprob))
else:
print("Fstat = %.2f, Fprob = %.2g" % (Fstat, Fprob))
elif args.mode == "reference":
ref_method, CD, txt = reference_difference(avg_ranks, n_datasets)
if args.output:
outRef = args.output + "_ref.tex"
with open(outRef + "w") as fp:
fp.write(outRef + "%")
outCD = args.output + "_CD.tex"
with open(outCD + "w") as fp:
fp.write(outCD + "%")
else:
print("Reference method = %s, CD = %.2f" % (ref_method, CD))
print("")
print("\n".join(txt))
if __name__ == "__main__":
main()