in SpeechRecognition/create_wav2vec2.py [0:0]
def forward(self, waveforms: Tensor) -> str:
"""Given a single channel speech data, return transcription.
Args:
waveforms (Tensor): Speech tensor. Shape `[1, num_frames]`.
Returns:
str: The resulting transcript
"""
logits, _ = self.model(waveforms) # [batch, num_seq, num_label]
best_path = torch.argmax(logits[0], dim=-1) # [num_seq,]
prev = ''
hypothesis = ''
for i in best_path:
char = self.labels[i]
if char == prev:
continue
if char == '<s>':
prev = ''
continue
hypothesis += char
prev = char
return hypothesis.replace('|', ' ')