in api/simultaneousinterpreter/translate_speech.go [135:168]
func (h *handler) speechToText(ctx context.Context, lang, audio string) (string, error) {
wave, err := hd.Base64ToWave(ctx, audio)
if err != nil {
return "", hd.Wrapf("failed Base64ToWave: %w", err)
}
req := &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 48000,
LanguageCode: lang,
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: wave},
},
}
res, err := h.Speech.Recognize(ctx, req)
if err != nil {
return "", hd.Errorf(ctx,
http.StatusInternalServerError,
http.StatusText(http.StatusInternalServerError),
"failed Speech.Recognize: %w", err)
}
if len(res.Results) == 0 {
return "", hd.Errorf(ctx,
http.StatusBadRequest,
"no text was recognized",
"no text was recognized")
}
return res.Results[0].Alternatives[0].Transcript, nil
}