def main()

in src/mlm/cmds.py [0:0]
97 lines of code
1 McCabe index (conditional complexity)

def main() -> None:
    """Defines arguments for all subcommands"""
    parser = argparse.ArgumentParser(description="Masked Language Model Scoring")
    subparsers = parser.add_subparsers(help="Run 'mlm {subcommand} -h' for details")

    logging.basicConfig(
        format='%(asctime)s %(levelname)-8s %(message)s',
        level=logging.INFO,
        datefmt='%Y-%m-%d %H:%M:%S')

    # score
    parser_score = subparsers.add_parser('score', help='Scores JSON or TXT files of sentences')
    _shared_args(parser_score)
    parser_score.add_argument('--mode', type=str, choices=['ref', 'hyp'],
                        help="Scoring references (.txt, .json 'refs') vs. hypotheses (.json 'hyp_*')")
    parser_score.add_argument('--temp', type=float, default=1.0,
                        help="softmax temperature")
    parser_score.add_argument('--split-size', type=int, default=500,
                        help="split size (per GPU)")
    parser_score.add_argument('--no-mask', action='store_true',
                        help="Instead of making masked copies, do not mask")
    parser_score.add_argument('--tgt', type=str, default='en',
                        help="Code to use for language embeddings, where appropriate")
    parser_score.add_argument('--eos', action='store_true',
                        help="append '.' (this can help mitigate train-test disparity)")
    parser_score.add_argument('--detok', action='store_true',
                        help="perform Moses English detokenization on hypotheses before scoring")

    capitalize_parser = parser_score.add_mutually_exclusive_group(required=False)
    capitalize_parser.add_argument('--capitalize', dest='capitalize', action='store_true')
    capitalize_parser.add_argument('--no-capitalize', dest='capitalize', action='store_false')
    parser_score.set_defaults(capitalize=None)
    parser_score.add_argument('--whole-word-mask', action='store_true',
                        help="mask whole words")
    parser_score.add_argument('--per-token', action='store_true',
                        help="output lists of per-token scores (slower)")
    parser_score.add_argument('infile', nargs='?', type=argparse.FileType('rt'),
                        help="File to score (.json = ESPNet JSON, otherwise newline-separated text). Loads whole file into memory!")
    parser_score.set_defaults(func=cmd_score)

    # bin (same arguments as score; when stable, make flag)
    parser_bin = subparsers.add_parser('bin', help='Computes bin statistics when scoring')
    _shared_args(parser_bin)
    parser_bin.add_argument('--mode', type=str, choices=['ref', 'hyp'],
                        help="Scoring references (.txt, .json 'refs') vs. hypotheses (.json 'hyp_*')")
    parser_bin.add_argument('--temp', type=float, default=1.0,
                        help="softmax temperature")
    parser_bin.add_argument('--split-size', type=int, default=1000,
                        help="split size (per GPU)")
    parser_bin.add_argument('--no-mask', action='store_true',
                        help="Instead of making masked copies, do not mask")
    parser_bin.add_argument('--eos', action='store_true',
                        help="append '.' (this can help mitigate train-test disparity)")
    capitalize_parser = parser_bin.add_mutually_exclusive_group(required=False)
    capitalize_parser.add_argument('--capitalize', dest='capitalize', action='store_true')
    capitalize_parser.add_argument('--no-capitalize', dest='capitalize', action='store_false')
    parser_bin.set_defaults(capitalize=None)
    parser_bin.add_argument('--whole-word-mask', action='store_true',
                        help="mask whole words")
    parser_bin.add_argument('infile', nargs='?', type=argparse.FileType('rt'),
                        help="File to score (.json = ESPNet JSON, otherwise newline-separated text). Loads whole file into memory!")
    parser_bin.add_argument('counts_file', nargs='?', type=str,
                        help="where to dump the counts per bin")
    parser_bin.add_argument('sums_file', nargs='?', type=str,
                    help="where to dump the sums per bin")
    parser_bin.set_defaults(func=cmd_bin)

    # rescore
    parser_rescore = subparsers.add_parser('rescore', help='Rescores two files together')
    _shared_args(parser_rescore)
    parser_rescore.add_argument('--weight', type=str, default='0.3',
                    help="AM score is (1-sum(weight)), LM scores are weights delimited by commas")
    parser_rescore.add_argument('--ref-file', type=argparse.FileType('rt'),
                    help="Specify an alternative reference file to FILE_AM")
    parser_rescore.add_argument('--ln', type=float, default=None,
                        help="apply GNMT normalization with this scale to each >>LM<< score")
    parser_rescore.add_argument('--ln-type', type=str, choices=['gnmt', 'length'], default='gnmt',
                        help="type of normalization to apply")
    parser_rescore.add_argument('file_am', type=argparse.FileType('rt'),
                        help="File with AM scores (.json = JSON)")
    parser_rescore.add_argument('file_lm', type=str,
                        help="File(s) with LM scores (.json = JSON), delimited by commas")
    parser_rescore.set_defaults(func=cmd_rescore)

    # finetune
    parser_finetune = subparsers.add_parser('finetune', help='Finetune to scoring without masks')
    _shared_args(parser_finetune)
    parser_finetune.add_argument('--corpus-dir', type=str, required=True,
                        help="Directory of part.*")
    parser_finetune.add_argument('--score-dir', type=str, required=True,
                        help="Directory of part.*.ref.scores")
    parser_finetune.add_argument('--output-dir', type=str, required=True,
                        help="Directory to output .param files")
    parser_finetune.add_argument('--freeze', type=int, default=0,
                        help="Number of initial layers to freeze")

    # TODO: deduplicate
    parser_finetune.add_argument('--eos', action='store_true',
                        help="append '.' (this can help mitigate train-test disparity)")
    capitalize_parser = parser_finetune.add_mutually_exclusive_group(required=False)
    capitalize_parser.add_argument('--capitalize', dest='capitalize', action='store_true')
    capitalize_parser.add_argument('--no-capitalize', dest='capitalize', action='store_false')
    parser_finetune.set_defaults(capitalize=None)
    parser_finetune.add_argument('--whole-word-mask', action='store_true',
                        help="mask whole words")
    parser_finetune.add_argument('--split-size', type=int, default=1000,
                    help="split size (per GPU)")


    parser_finetune.set_defaults(func=cmd_finetune)

    args = parser.parse_args()
    args.func(args)