in fairseq/models/lightconv_lm.py [0:0]
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument(
"--dropout",
default=0.1,
type=float,
metavar="D",
help="dropout probability",
)
parser.add_argument(
"--attention-dropout",
default=0.0,
type=float,
metavar="D",
help="dropout probability for attention weights",
)
parser.add_argument(
"--relu-dropout",
default=0.0,
type=float,
metavar="D",
help="dropout probability after ReLU in FFN",
)
parser.add_argument(
"--input-dropout",
type=float,
metavar="D",
help="dropout probability of the inputs",
)
parser.add_argument(
"--decoder-embed-dim",
type=int,
metavar="N",
help="decoder embedding dimension",
)
parser.add_argument(
"--decoder-output-dim",
type=int,
metavar="N",
help="decoder output dimension",
)
parser.add_argument(
"--decoder-input-dim", type=int, metavar="N", help="decoder input dimension"
)
parser.add_argument(
"--decoder-ffn-embed-dim",
type=int,
metavar="N",
help="decoder embedding dimension for FFN",
)
parser.add_argument(
"--decoder-layers", type=int, metavar="N", help="num decoder layers"
)
parser.add_argument(
"--decoder-attention-heads",
type=int,
metavar="N",
help="num decoder attention heads or LightConv/DynamicConv heads",
)
parser.add_argument(
"--decoder-normalize-before",
default=False,
action="store_true",
help="apply layernorm before each decoder block",
)
parser.add_argument(
"--adaptive-softmax-cutoff",
metavar="EXPR",
help="comma separated list of adaptive softmax cutoff points. "
"Must be used with adaptive_loss criterion",
)
parser.add_argument(
"--adaptive-softmax-dropout",
type=float,
metavar="D",
help="sets adaptive softmax dropout for the tail projections",
)
parser.add_argument(
"--adaptive-softmax-factor",
type=float,
metavar="N",
help="adaptive input factor",
)
parser.add_argument(
"--no-token-positional-embeddings",
default=False,
action="store_true",
help="if set, disables positional embeddings (outside self attention)",
)
parser.add_argument(
"--share-decoder-input-output-embed",
default=False,
action="store_true",
help="share decoder input and output embeddings",
)
parser.add_argument(
"--character-embeddings",
default=False,
action="store_true",
help="if set, uses character embedding convolutions to produce token embeddings",
)
parser.add_argument(
"--character-filters",
type=str,
metavar="LIST",
default="[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]",
help="size of character embeddings",
)
parser.add_argument(
"--character-embedding-dim",
type=int,
metavar="N",
default=4,
help="size of character embeddings",
)
parser.add_argument(
"--char-embedder-highway-layers",
type=int,
metavar="N",
default=2,
help="number of highway layers for character token embeddder",
)
parser.add_argument(
"--adaptive-input",
default=False,
action="store_true",
help="if set, uses adaptive input",
)
parser.add_argument(
"--adaptive-input-factor",
type=float,
metavar="N",
help="adaptive input factor",
)
parser.add_argument(
"--adaptive-input-cutoff",
metavar="EXPR",
help="comma separated list of adaptive input cutoff points.",
)
parser.add_argument(
"--tie-adaptive-weights",
action="store_true",
help="if set, ties the weights of adaptive softmax and adaptive input",
)
parser.add_argument(
"--tie-adaptive-proj",
action="store_true",
help="if set, ties the projection weights of adaptive softmax and adaptive input",
)
parser.add_argument(
"--decoder-learned-pos",
action="store_true",
help="use learned positional embeddings in the decoder",
)
"""LightConv and DynamicConv arguments"""
parser.add_argument(
"--decoder-kernel-size-list",
type=lambda x: utils.eval_str_list(x, int),
help='list of kernel size (default: "[3,7,15,31,31,31]")',
)
parser.add_argument(
"--decoder-glu", type=utils.eval_bool, help="glu after in proj"
)
parser.add_argument(
"--decoder-conv-type",
default="dynamic",
type=str,
choices=["dynamic", "lightweight"],
help="type of convolution",
)
parser.add_argument("--weight-softmax", default=True, type=utils.eval_bool)
parser.add_argument(
"--weight-dropout",
type=float,
metavar="D",
help="dropout probability for conv weights",
)