in optimum/graphcore/pipelines/automatic_speech_recognition.py [0:0]
def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None):
if not self.type == "seq2seq_whisper":
return super()._forward(model_inputs, return_timestamps=return_timestamps, generate_kwargs=generate_kwargs)
if generate_kwargs is None:
generate_kwargs = {}
if return_timestamps:
generate_kwargs["return_timestamps"] = return_timestamps
is_last = model_inputs.pop("is_last")
if "input_features" in model_inputs:
inputs = model_inputs.pop("input_features")
elif "input_values" in model_inputs:
inputs = model_inputs.pop("input_values")
else:
raise ValueError(
"Seq2Seq speech recognition model requires either a "
f"`input_features` or `input_values` key, but only has {model_inputs.keys()}"
)
attention_mask = model_inputs.pop("attention_mask", None)
tokens = self.model.generate(inputs=inputs, attention_mask=attention_mask, **generate_kwargs)
out = {"tokens": tokens}
stride = model_inputs.pop("stride", None)
if stride is not None:
out["stride"] = stride
extra = model_inputs
maybe_padded_ret = {"is_last": is_last, **out, **extra}
# Remove inputs and outputs associated with padded inputs.
if not isinstance(is_last, list):
is_last = [is_last]
first_padding_idx = tokens.shape[0]
for idx, last in enumerate(is_last):
if last is None:
first_padding_idx = idx
break
if first_padding_idx == tokens.shape[0]:
return maybe_padded_ret
padded_keys = ["is_last", "tokens"]
if stride is not None:
padded_keys.append("stride")
for padded_key in padded_keys:
maybe_padded_ret[padded_key] = maybe_padded_ret[padded_key][:first_padding_idx]
return maybe_padded_ret