def create_arg_parser()

in learn_bpe.py [0:0]


def create_arg_parser(subparsers=None):
    """
    Create the argument parser.

    Copied from the original implementation to be command-line compatible.
    """
    if subparsers:
        parser = subparsers.add_parser('learn-bpe',
                                       formatter_class=argparse.RawDescriptionHelpFormatter,
                                       description="learn BPE-based word segmentation")
    else:
        parser = argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="learn BPE-based word segmentation")

    parser.add_argument(
        '--input', '-i', type=argparse.FileType('r'), default=[sys.stdin],
        metavar='PATH', nargs="+",
        help="Input text(s) (default: standard input).")

    parser.add_argument(
        '--probabilistic', '-p', action="store_true",
        help="Use probabilistic BPE")
    parser.add_argument(
        '--frac-stopping', '-fs', type=float, default=0.0,
        help="(Probabilistic) Stop when the likelihood increase falls below this fraction of the initial one)")
    parser.add_argument(
        '--frac-stopping-average', '-fsa', type=int, default=5,
        help='"Mini-batch" size for frac-stopping computation (default: %(default)s)')
    parser.add_argument(
        '--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
        metavar='PATH',
        help="Output file for BPE codes (default: standard output)")
    parser.add_argument(
        '--symbols', '-s', type=int, default=10000,
        help="Create this many new symbols (each representing a character n-gram) (default: %(default)s))")
    parser.add_argument(
        '--min-frequency', type=int, default=2, metavar='FREQ',
        help='Stop if no symbol pair has frequency >= FREQ (default: %(default)s))')
    parser.add_argument('--dict-input', action="store_true",
        help="If set, input file is interpreted as a dictionary where each line contains a word-count pair")
    parser.add_argument(
        '--total-symbols', '-t', action="store_true",
        help="Subtract number of characters from the symbols to be generated (so that '--symbols' becomes an estimate for the total number of symbols needed to encode text).")
    parser.add_argument(
        '--verbose', '-v', action="store_true",
        help="verbose mode.")

    return parser