in docker_images/k2/app/common.py [0:0]
def decode_waves(self, waves: List[torch.Tensor]) -> List[List[str]]:
"""
Args:
waves:
A list of 1-D torch.float32 tensors containing audio samples.
wavs[i] contains audio samples for the i-th utterance.
Note:
Whether it should be in the range [-32768, 32767] or be normalized
to [-1, 1] depends on which range you used for your training data.
For instance, if your training data used [-32768, 32767],
then the given waves have to contain samples in this range.
All models trained in icefall use the normalized range [-1, 1].
Returns:
Return a list of decoded results. `ans[i]` contains the decoded
results for `wavs[i]`.
"""
waves = [w.to(self.device) for w in waves]
features = self.feature_extractor(waves)
tokens = self.nn_and_decoding_func(self.model, features)
if hasattr(self, "sp"):
results = self.sp.decode(tokens)
else:
results = [[self.token_table[i] for i in hyp] for hyp in tokens]
results = ["".join(r) for r in results]
return results