in optimum/graphcore/models/deberta/modeling_deberta.py [0:0]
def change_modules_for_ipu(self, restore: bool):
for mod in self.modules():
if isinstance(mod, DisentangledSelfAttention):
mod.__class__ = DisentangledSelfAttention if restore else IPUDisentangledSelfAttention
if restore:
del mod.xsoftmax
else:
mod.xsoftmax = XSoftmax(-1)
if restore:
if isinstance(mod, nn.Dropout):
mod.__class__ = StableDropout
mod.drop_prob = mod.p
mod.count = 0
mod.context_stack = None
else:
if isinstance(mod, StableDropout):
mod.__class__ = nn.Dropout
mod.p = mod.drop_prob
mod.inplace = False
if isinstance(mod, DebertaEncoder):
func = DebertaEncoder.get_rel_embedding if restore else _get_rel_embedding
mod.get_rel_embedding = func.__get__(mod, DebertaEncoder)