prediction_generation/original-project/analysis/scripts/descriptive_annotations.py (52 lines of code) (raw):

# -*- coding: utf-8 -*- """Extract descriptive statistics for the time series This script is used to extract descriptive statistics about the number of annotations from the summary files. Author: Gertjan van den Burg Copyright (c) 2020 - The Alan Turing Institute License: See the LICENSE file. """ import argparse import json import os import statistics def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( "-s", "--summary-dir", help="Directory with summary files", required=True, ) parser.add_argument( "-t", "--type", help="Type of statistic to compute", choices=["min", "max", "mean", "std"], required=True, ) return parser.parse_args() def load_unique_annotations(summary_dir): files = os.listdir(summary_dir) n_uniq_anno = [] for f in sorted(files): path = os.path.join(summary_dir, f) with open(path, "r") as fp: data = json.load(fp) all_anno = set() for annotations in data["annotations"].values(): for cp in annotations: all_anno.add(cp) n_uniq_anno.append(len(all_anno)) return n_uniq_anno def main(): args = parse_args() if args.type == "max": func = max elif args.type == "mean": func = statistics.mean elif args.type == "std": func = statistics.stdev elif args.type == "min": func = min else: raise ValueError("Unknown type") n_uniq_anno = load_unique_annotations(args.summary_dir) if args.type in ["min", "max"]: print("%i%%" % func(n_uniq_anno)) else: print("%.1f%%" % func(n_uniq_anno)) if __name__ == "__main__": main()