in pytorch_translate/options.py [0:0]
def expand_optimization_args(group):
"""Expands the optimization related arguments with pytorch_translate
specific arguments"""
group.add_argument(
"--local-num-gpus",
default=torch.cuda.device_count(),
type=int,
metavar="N",
help=(
"The number of local GPUs to use for training on this machine. "
"Defaults to using all visible GPUs. This should be "
"<= --distributed-world-size."
),
)
group.add_argument(
"--stop-time-hr",
default=-1.0,
type=float,
metavar="N",
help="Stops training after N hours have elapsed. Use decimal values "
"for sub-hourly granularity. A value of < 0 disables this.",
)
group.add_argument(
"--stop-no-best-validate-loss",
default=-1,
type=int,
metavar="N",
help="Stops training after N validations have been run without "
"achieving a better loss than before. Note that this is affected by "
"--save-interval-updates in how frequently we run validation in the "
"first place. A value of < 0 disables this.",
)
group.add_argument(
"--stop-no-best-bleu-eval",
default=-1,
type=int,
metavar="N",
help="Stops training after N evals have been run without "
"achieving a better BLEU score than before. Note that this is affected "
"by --save-interval-updates in how frequently we run BLEU eval "
"in the first place. A value of < 0 disables this.",
)
group.add_argument(
"--shrink-lr-no-best-tune-loss",
default=5,
type=int,
metavar="N",
help="Decay learning rate after N evals have been run without "
"achieving a lower tune loss than before. This is to achieve "
"decay lr within an epoch, independent of lr_scheduler. "
"Note that this is affected by --save-interval-updates in "
"how frequently we run BLEU eval in the first place. "
"A value of < 0 disables this.",
)
group.add_argument(
"--pruning-percentile",
type=int,
default=0,
help="Proportion of weights to prune. A value <=0 disables pruning."
" By default, prunes weights uniformly and ignores bias terms.",
)
group.add_argument(
"--parameters-to-prune",
default="all",
help="Names of layers to prune. Layers are pruned if the argument is "
"a substring of the layer name. Options are 'all', 'embed', 'lstm'. ",
)
group.add_argument(
"--loss-beam",
type=int,
default=0,
help="Beam size to use for 'sequence_nll' loss and 'sequence_risk' "
"loss. If zero, use --beam.",
)
group.add_argument(
"--disable-eval-bleu",
nargs="?",
const=True,
default=False,
type=utils.bool_flag,
help=("disable bleu score evaluation on tune dataset"),
)
return group