in optimum/graphcore/pipelines/__init__.py [0:0]
def new_forward(self, model_inputs, *args, **kwargs):
if isinstance(self.model, poptorch.PoplarExecutor) and not for_generation:
# For non-text generation models, support batch size changes.
poplar_executor = self.model
if poplar_executor._executable_inputs:
for arg in poplar_executor._executable_inputs.args:
if isinstance(arg, torch.Tensor):
compiled_bs = arg.shape[0]
break
for input in model_inputs.values():
if isinstance(input, torch.Tensor):
input_bs = input.shape[0]
break
if compiled_bs != input_bs:
poplar_executor.destroy()
if isinstance(self.model, poptorch.PoplarExecutor) or isinstance(self.model, IPUGenerationMixin):
if fp16:
# Support fp16
for key, input in model_inputs.items():
if isinstance(input, torch.Tensor) and input.dtype == torch.float32:
model_inputs[key] = input.half()
return old_forward(self, model_inputs, *args, **kwargs)