in optimum/graphcore/generation/utils.py [0:0]
    def forward(self, t, beam_idx=None, **model_inputs):
        """
        Args:
            t : (`torch.Tensor(int)`) Tensor with single int representing the current length of the sequence being generated
            beam_idx: (`torch.LongTensor` of shape `(batch_size * num_beams,)`):
                Beam indices indicating to which beam the tokens were added, required for reordering the on-device KV cache.
            model_inputs : Regular model_inputs passed to the wrapped model.
        Returns:
            The output logits at position `t` only
        """
        for module in self._modules_with_attributes_in_buffers["_generation_step"]:
            module._generation_step.copy_(t)
        # When generation is done on host, the beam_idx has to be provided as an input.
        # When generation is done on device, the beam_idx is stored in a separate buffer.
        if beam_idx is None:
            if hasattr(self.pipelined_model, "generation_strategy") and hasattr(
                self.pipelined_model.generation_strategy, "_cached_beam_idx"
            ):
                beam_idx = self.pipelined_model.generation_strategy._cached_beam_idx.int()
        for module in self._modules_with_attributes_in_buffers["_beam_idx"]:
            if beam_idx is None:
                raise ValueError(
                    "A module registered a `beam_idx` buffer, but the pipelined model is not called with such, "
                    "or the on device beam search did not register `_cached_beam_idx`. For the first case, "
                    "`beam_idx` can be provided to the model via `prepare_inputs_for_generation`."
                )
            module._beam_idx.copy_(beam_idx)
        # Run the decoder
        kwargs = self._get_buffered_outputs()
        outputs = self.pipelined_model(**model_inputs, **kwargs)
        if isinstance(outputs, ModelOutput) and not isinstance(outputs, OnDeviceGenerationModelOutput):
            outputs = type(outputs)(
                logits=outputs.logits,
            )
        return outputs