def add_model_parameters()

in sockeye/arguments.py [0:0]


def add_model_parameters(params):
    model_params = params.add_argument_group("ModelConfig")

    model_params.add_argument('--params', '-p',
                              type=str,
                              default=None,
                              help='Initialize model parameters from file. Overrides random initializations.')
    model_params.add_argument('--allow-missing-params',
                              action="store_true",
                              default=False,
                              help="Allow missing parameters when initializing model parameters from file. "
                                   "Default: %(default)s.")
    model_params.add_argument('--ignore-extra-params',
                              action="store_true",
                              default=False,
                              help="Allow extra parameters when initializing model parameters from file. "
                                   "Default: %(default)s.")

    model_params.add_argument('--encoder',
                              choices=C.ENCODERS,
                              default=C.TRANSFORMER_TYPE,
                              help="Type of encoder. Default: %(default)s.")
    model_params.add_argument('--decoder',
                              choices=C.DECODERS,
                              default=C.TRANSFORMER_TYPE,
                              help="Type of decoder. Default: %(default)s. "
                                   "'ssru_transformer' uses Simpler Simple Recurrent Units (Kim et al, 2019) "
                                   "as replacement for self-attention layers.")

    model_params.add_argument('--num-layers',
                              type=multiple_values(num_values=2, greater_or_equal=1),
                              default=(6, 6),
                              help='Number of layers for encoder & decoder. '
                                   'Use "x:x" to specify separate values for encoder & decoder. Default: %(default)s.')

    # transformer arguments
    model_params.add_argument('--transformer-model-size',
                              type=multiple_values(num_values=2, greater_or_equal=1),
                              default=(512, 512),
                              help='Number of hidden units in transformer layers. '
                                   'Use "x:x" to specify separate values for encoder & decoder. Default: %(default)s.')
    model_params.add_argument('--transformer-attention-heads',
                              type=multiple_values(num_values=2, greater_or_equal=1),
                              default=(8, 8),
                              help='Number of heads for all self-attention when using transformer layers. '
                                   'Use "x:x" to specify separate values for encoder & decoder. Default: %(default)s.')
    model_params.add_argument('--transformer-feed-forward-num-hidden',
                              type=multiple_values(num_values=2, greater_or_equal=1),
                              default=(2048, 2048),
                              help='Number of hidden units in transformers feed forward layers. '
                                   'Use "x:x" to specify separate values for encoder & decoder. Default: %(default)s.')
    model_params.add_argument('--transformer-feed-forward-use-glu',
                              action='store_true',
                              default=False,
                              help='Use Gated Linear Units in transformer feed forward networks (Daupin et al. 2016, '
                                   'arxiv.org/abs/1612.08083; Shazeer 2020, arxiv.org/abs/2002.05202). Default: '
                                   '%(default)s.')
    model_params.add_argument('--transformer-activation-type',
                              type=multiple_values(num_values=2, greater_or_equal=None, data_type=str),
                              default=(C.RELU, C.RELU),
                              help='Type of activation to use for each feed forward layer. Use "x:x" to specify '
                                   'different values for encoder & decoder. Supported: {}. Default: '
                                   '%(default)s.'.format(' '.join(C.TRANSFORMER_ACTIVATION_TYPES)))
    model_params.add_argument('--transformer-positional-embedding-type',
                              choices=C.POSITIONAL_EMBEDDING_TYPES,
                              default=C.FIXED_POSITIONAL_EMBEDDING,
                              help='The type of positional embedding. Default: %(default)s.')
    model_params.add_argument('--transformer-preprocess',
                              type=multiple_values(num_values=2, greater_or_equal=None, data_type=str),
                              default=('n', 'n'),
                              help='Transformer preprocess sequence for encoder and decoder. Supports three types of '
                                   'operations: d=dropout, r=residual connection, n=layer normalization. You can '
                                   'combine in any order, for example: "ndr". '
                                   'Leave empty to not use any of these operations. '
                                   'You can specify separate sequences for encoder and decoder by separating with ":" '
                                   'For example: n:drn '
                                   'Default: %(default)s.')
    model_params.add_argument('--transformer-postprocess',
                              type=multiple_values(num_values=2, greater_or_equal=None, data_type=str),
                              default=('dr', 'dr'),
                              help='Transformer postprocess sequence for encoder and decoder. Supports three types of '
                                   'operations: d=dropout, r=residual connection, n=layer normalization. You can '
                                   'combine in any order, for example: "ndr". '
                                   'Leave empty to not use any of these operations. '
                                   'You can specify separate sequences for encoder and decoder by separating with ":" '
                                   'For example: n:drn '
                                   'Default: %(default)s.')

    model_params.add_argument('--lhuc',
                              nargs="+",
                              default=None,
                              choices=C.LHUC_CHOICES,
                              metavar="COMPONENT",
                              help="Use LHUC (Vilar 2018). Include an amplitude parameter to hidden units for"
                              " domain adaptation. Needs a pre-trained model. Valid values: {values}."
                              " Default: %(default)s.".format(
                                  values=", ".join(C.LHUC_CHOICES)))

    # embedding arguments
    model_params.add_argument('--num-embed',
                              type=multiple_values(num_values=2, greater_or_equal=1),
                              default=(None, None),
                              help='Embedding size for source and target tokens. '
                                   'Use "x:x" to specify separate values for src&tgt. Default: %d.' % C.DEFAULT_NUM_EMBED)
    model_params.add_argument('--source-factors-num-embed',
                              type=int,
                              nargs='+',
                              default=[],
                              help='Embedding size for additional source factors. '
                                   'You must provide as many dimensions as '
                                   '(validation) source factor files. Default: %(default)s.')
    model_params.add_argument('--target-factors-num-embed',
                              type=int,
                              nargs='+',
                              default=[],
                              help='Embedding size for additional target factors. '
                                   'You must provide as many dimensions as '
                                   '(validation) target factor files. Default: %(default)s.')
    model_params.add_argument('--source-factors-combine', '-sfc',
                              choices=C.FACTORS_COMBINE_CHOICES,
                              default=[C.FACTORS_COMBINE_SUM],
                              nargs='+',
                              help='How to combine source factors. Can be either one value which will be applied to '
                                   'all source factors, or a list of values. Default: %(default)s.')
    model_params.add_argument('--target-factors-combine', '-tfc',
                              choices=C.FACTORS_COMBINE_CHOICES,
                              default=[C.FACTORS_COMBINE_SUM],
                              nargs='+',
                              help='How to combine target factors. Can be either one value which will be applied to '
                                   'all target factors, or a list of values. Default: %(default)s.')
    model_params.add_argument('--source-factors-share-embedding',
                              type=bool_str(),
                              nargs='+',
                              default=[False],
                              help='Share the embeddings with the source language. '
                                   'Can be either one value which will be applied '
                                   'to all source factors, or a list of values. Default: %(default)s.')
    model_params.add_argument('--target-factors-share-embedding',
                              type=bool_str(),
                              nargs='+',
                              default=[False],
                              help='Share the embeddings with the target language. '
                                   'Can be either one value which will be applied '
                                   'to all target factors, or a list of values. Default: %(default)s.')

    model_params.add_argument('--weight-tying-type',
                              default=C.WEIGHT_TYING_SRC_TRG_SOFTMAX,
                              choices=C.WEIGHT_TYING_TYPES,
                              help='The type of weight tying. source embeddings=src, target embeddings=trg, '
                                   'target softmax weight matrix=softmax. Default: %(default)s.')

    model_params.add_argument('--dtype', default=C.DTYPE_FP32, choices=[C.DTYPE_FP32, C.DTYPE_FP16],
                              help="Data type.")

    model_params.add_argument('--amp',
                              action='store_true',
                              help='Use PyTorch automatic mixed precision (AMP) to run compatible operations in '
                                   'float16 mode instead of float32.')
    model_params.add_argument('--apex-amp',
                              action='store_true',
                              help='Use NVIDIA Apex automatic mixed precision (AMP) to run the entire model in float16 '
                                   'mode with float32 master weights and dynamic loss scaling. This is faster than '
                                   'PyTorch AMP with some additional risk and requires installing Apex: '
                                   'https://github.com/NVIDIA/apex')