in optimum/graphcore/models/deberta/modeling_deberta.py [0:0]
def parallelize(self):
"""
Transform the model to run in an IPU pipeline.
- Adds pipeline stages to the model
- (If enabled) Replaces the word embedding with a SerializedEmbedding
- Replaces several modules with IPU compatible counterparts
- Adds recomputation checkpoints
"""
self._hooks = []
logger.info("-------------------- Device Allocation --------------------")
logger.info("Embedding --> IPU 0")
if self.ipu_config.embedding_serialization_factor > 1:
if isinstance(self, PipelinedDebertaForMaskedLM):
self.cls.predictions.decoder = SerializedLinear.from_model(
self.cls.predictions.decoder, self.ipu_config.embedding_serialization_factor
)
self.tie_weights()
else:
self.deberta.embeddings.word_embeddings = SerializedEmbedding.from_model(
self.deberta.embeddings.word_embeddings, self.ipu_config.embedding_serialization_factor
)
self.change_modules_for_ipu(False)
self.deberta.embeddings = poptorch.BeginBlock(self.deberta.embeddings, "Embedding", ipu_id=0)
hs = outline_attribute(self.deberta.embeddings.LayerNorm, "embedding")
self._hooks.extend(hs)
self.deberta.encoder = poptorch.BeginBlock(self.deberta.encoder, ipu_id=0)
if self.deberta.encoder.relative_attention:
self.deberta.encoder.rel_embeddings = poptorch.BeginBlock(self.deberta.encoder.rel_embeddings, ipu_id=0)
layer_ipu = get_layer_ipu(self.ipu_config, self.deberta.encoder.layer)
for index, layer in enumerate(self.deberta.encoder.layer):
ipu = layer_ipu[index]
if self.ipu_config.recompute_checkpoint_every_layer and index != self.config.num_hidden_layers - 1:
h = recomputation_checkpoint(layer)
self._hooks.append(h)
self.deberta.encoder.layer[index] = poptorch.BeginBlock(layer, f"Encoder{index}", ipu_id=ipu)
logger.info(f"Encoder {index:<2} --> IPU {ipu}")
if isinstance(self, PipelinedDebertaForMaskedLM):
logger.info(f"Projection {index:<2} --> IPU {0}")
self.cls.predictions.decoder = poptorch.BeginBlock(self.cls.predictions.decoder, "Projection", ipu_id=0)
return self