def main()

in spark_scripts/stat_for_ner_category_to_wh_words.py [0:0]


def main(sc):
    argp = argparse.ArgumentParser()
    argp.add_argument('--squadner', help='input path of squadner data', required=True)
    argp.add_argument('--output-dir', default="output/", help='', required=True)
    argp.add_argument('--num-partitions', type=int, default=1000, help='')
    argp.add_argument('--debug-save', help='for debugging purposes', action='store_true')
    args = argp.parse_args()

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    with ExitStack() as stack:
        metric_filename = os.path.join(args.output_dir, 'metric.txt')
        metric_fptr = stack.enter_context(open(metric_filename, 'w'))

        metric_per_category_fptr = stack.enter_context(open(
            os.path.join(args.output_dir, 'metric_per_category.txt'), 'w'))

        toml_fptr = stack.enter_context(open(
            os.path.join(args.output_dir, 'whxx_ngram_table.toml'), 'w'))

        question_rdd = sc.textFile(args.squadner, minPartitions=args.num_partitions).map(RcQuestion.deserialize_json)
        _run_stat(question_rdd, metric_fptr, metric_per_category_fptr, toml_fptr, args.output_dir)
        logging.info('Output dir: {}'.format(args.output_dir))