in optimum/onnxruntime/modeling_seq2seq.py [0:0]
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs) -> BaseModelOutput:
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)
model_inputs = {
"input_ids": input_ids,
"attention_mask": attention_mask,
}
if self.use_io_binding:
output_shapes, output_buffers = self._prepare_io_binding(model_inputs)
if self.device.type == "cpu":
self.session.run_with_iobinding(self._io_binding)
else:
self._io_binding.synchronize_inputs()
self.session.run_with_iobinding(self._io_binding)
self._io_binding.synchronize_outputs()
last_hidden_state = output_buffers["last_hidden_state"].view(output_shapes["last_hidden_state"])
else:
onnx_inputs = self._prepare_onnx_inputs(use_torch, model_inputs)
onnx_outputs = self.session.run(None, onnx_inputs)
model_outputs = self._prepare_onnx_outputs(use_torch, onnx_outputs)
last_hidden_state = model_outputs["last_hidden_state"]
return BaseModelOutput(last_hidden_state=last_hidden_state)