in optimum/exporters/neuron/model_configs.py [0:0]
def outputs(self) -> List[str]:
beam_outputs = ["next_token_scores", "next_tokens", "next_indices"] if self.num_beams > 1 else ["next_tokens"]
common_outputs = (
beam_outputs
+ [f"past.{idx}.self.key" for idx in range(self._config.num_decoder_layers)]
+ [f"past.{idx}.self.value" for idx in range(self._config.num_decoder_layers)]
+ [f"past.{idx}.cross.key" for idx in range(self._config.num_decoder_layers)]
+ [f"past.{idx}.cross.value" for idx in range(self._config.num_decoder_layers)]
)
if self.output_hidden_states:
# Flatten hidden states of all layers
common_outputs += [
f"decoder_hidden_state.{idx}" for idx in range(self._config.num_decoder_layers + 1)
] # +1 for the embedding layer
if self.output_attentions:
# Flatten attentions tensors of all attention layers
common_outputs += [f"decoder_attention.{idx}" for idx in range(self._config.num_decoder_layers)]
if getattr(self._config, "is_encoder_decoder", False) is True:
common_outputs += [f"cross_attention.{idx}" for idx in range(self._config.num_decoder_layers)]
return common_outputs