in learn_bpe.py [0:0]
def create_arg_parser(subparsers=None):
"""
Create the argument parser.
Copied from the original implementation to be command-line compatible.
"""
if subparsers:
parser = subparsers.add_parser('learn-bpe',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="learn BPE-based word segmentation")
else:
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="learn BPE-based word segmentation")
parser.add_argument(
'--input', '-i', type=argparse.FileType('r'), default=[sys.stdin],
metavar='PATH', nargs="+",
help="Input text(s) (default: standard input).")
parser.add_argument(
'--probabilistic', '-p', action="store_true",
help="Use probabilistic BPE")
parser.add_argument(
'--frac-stopping', '-fs', type=float, default=0.0,
help="(Probabilistic) Stop when the likelihood increase falls below this fraction of the initial one)")
parser.add_argument(
'--frac-stopping-average', '-fsa', type=int, default=5,
help='"Mini-batch" size for frac-stopping computation (default: %(default)s)')
parser.add_argument(
'--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
metavar='PATH',
help="Output file for BPE codes (default: standard output)")
parser.add_argument(
'--symbols', '-s', type=int, default=10000,
help="Create this many new symbols (each representing a character n-gram) (default: %(default)s))")
parser.add_argument(
'--min-frequency', type=int, default=2, metavar='FREQ',
help='Stop if no symbol pair has frequency >= FREQ (default: %(default)s))')
parser.add_argument('--dict-input', action="store_true",
help="If set, input file is interpreted as a dictionary where each line contains a word-count pair")
parser.add_argument(
'--total-symbols', '-t', action="store_true",
help="Subtract number of characters from the symbols to be generated (so that '--symbols' becomes an estimate for the total number of symbols needed to encode text).")
parser.add_argument(
'--verbose', '-v', action="store_true",
help="verbose mode.")
return parser