def deparallelize()

in optimum/graphcore/models/mt5/modeling_mt5.py [0:0]


    def deparallelize(self):
        """
        Undo the changes to the model done by `parallelize`.
        You should call this before doing `save_pretrained` so that the `model.state_dict` is
        fully compatible with `transformers.MT5ForConditionalGeneration`.
        """
        # MT5ForConditionalGeneration has a deparallelize method, so make sure that the PipelineMixin one is used here.
        PipelineMixin.deparallelize(self)

        self.encoder_and_decoder_embeddings_computation(False)

        if self.shared.__class__ == SerializedEmbedding:
            self.shared = self.shared.to_model()
            self.encoder.embed_tokens = self.shared
            self.decoder.embed_tokens = self.shared

        self.change_lm_head_to_indexed_input_linear(restore=True)

        if self.lm_head.__class__ == SerializedLinear:
            self.lm_head = self.lm_head.to_model()
            if self.config.tie_word_embeddings:
                self.tie_weights()
        elif self.lm_head.__class__ == SplitProjection:
            self.lm_head = self.lm_head.to_model()

        self.encoder.__class__ = MT5Stack
        self.decoder.__class__ = MT5Stack

        for block in self.encoder.block:
            block.__class__ = MT5Block
            block.layer[0].dropout = block.layer[0].dropout.module
            with torch.no_grad():
                block.layer[1].DenseReluDense.wo.weight *= block.layer[1].dropout.scale
            block.layer[1].dropout = block.layer[1].dropout.module
            if self.config.dense_act_fn == "gelu_new":
                block.layer[1].DenseReluDense.act = NewGELUActivation()

        for block in self.decoder.block:
            block.__class__ = MT5Block
            if self.config.dense_act_fn == "gelu_new":
                block.layer[2].DenseReluDense.act = NewGELUActivation()

        return self