in training/flax/distil_whisper/pipeline.py [0:0]
def postprocess(self, model_outputs, return_timestamps=None, return_language=None):
# unpack the outputs from list(dict(list)) to list(dict)
model_outputs = [dict(zip(output, t)) for output in model_outputs for t in zip(*output.values())]
time_precision = self.feature_extractor.chunk_length / self.model.config.max_source_positions
# Send the chunking back to seconds, it's easier to handle in whisper
sampling_rate = self.feature_extractor.sampling_rate
for output in model_outputs:
if "stride" in output:
chunk_len, stride_left, stride_right = output["stride"]
# Go back in seconds
chunk_len /= sampling_rate
stride_left /= sampling_rate
stride_right /= sampling_rate
output["stride"] = chunk_len, stride_left, stride_right
text, optional = self.tokenizer._decode_asr(
model_outputs,
return_timestamps=return_timestamps,
return_language=return_language,
time_precision=time_precision,
)
return {"text": text, **optional}