pytorch_translate/research/knowledge_distillation/dual_decoder_kd_model.py [43:98]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        parser.add_argument(
            "--student-decoder-embed-dim",
            type=int,
            metavar="N",
            help="[student RNN] decoder embedding dimension",
        )
        parser.add_argument(
            "--student-decoder-layers",
            type=int,
            metavar="N",
            help="[student RNN] num decoder layers",
        )
        parser.add_argument(
            "--student-decoder-attention-heads",
            type=int,
            metavar="N",
            help="[student RNN] num decoder attention heads",
        )
        parser.add_argument(
            "--student-decoder-lstm-units",
            type=int,
            metavar="N",
            help="[student RNN] num LSTM units for each decoder layer",
        )
        parser.add_argument(
            "--student-decoder-out-embed-dim",
            type=int,
            metavar="N",
            help="[student RNN] decoder output embedding dimension",
        )
        parser.add_argument(
            "--student-decoder-reduced-attention-dim",
            type=int,
            default=None,
            metavar="N",
            help="if specified, computes attention with this dimensionality "
            "in the student decoder (instead of using encoder output dims)",
        )

    @classmethod
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted
        # (in case there are any new ones)
        base_architecture(args)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=src_dict,
            embed_dim=args.encoder_embed_dim,
            path=args.encoder_pretrained_embed,
            freeze=args.encoder_freeze_embed,
        )

        teacher_decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



pytorch_translate/research/knowledge_distillation/hybrid_dual_decoder_kd_model.py [43:98]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        parser.add_argument(
            "--student-decoder-embed-dim",
            type=int,
            metavar="N",
            help="[student RNN] decoder embedding dimension",
        )
        parser.add_argument(
            "--student-decoder-layers",
            type=int,
            metavar="N",
            help="[student RNN] num decoder layers",
        )
        parser.add_argument(
            "--student-decoder-attention-heads",
            type=int,
            metavar="N",
            help="[student RNN] num decoder attention heads",
        )
        parser.add_argument(
            "--student-decoder-lstm-units",
            type=int,
            metavar="N",
            help="[student RNN] num LSTM units for each decoder layer",
        )
        parser.add_argument(
            "--student-decoder-out-embed-dim",
            type=int,
            metavar="N",
            help="[student RNN] decoder output embedding dimension",
        )
        parser.add_argument(
            "--student-decoder-reduced-attention-dim",
            type=int,
            default=None,
            metavar="N",
            help="if specified, computes attention with this dimensionality "
            "in the student decoder (instead of using encoder output dims)",
        )

    @classmethod
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted
        # (in case there are any new ones)
        base_architecture(args)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        encoder_embed_tokens = pytorch_translate_transformer.build_embedding(
            dictionary=src_dict,
            embed_dim=args.encoder_embed_dim,
            path=args.encoder_pretrained_embed,
            freeze=args.encoder_freeze_embed,
        )

        teacher_decoder_embed_tokens = pytorch_translate_transformer.build_embedding(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



