func()

in api/simultaneousinterpreter/translate_speech.go [135:168]


func (h *handler) speechToText(ctx context.Context, lang, audio string) (string, error) {
	wave, err := hd.Base64ToWave(ctx, audio)
	if err != nil {
		return "", hd.Wrapf("failed Base64ToWave: %w", err)
	}

	req := &speechpb.RecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 48000,
			LanguageCode:    lang,
		},
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: wave},
		},
	}

	res, err := h.Speech.Recognize(ctx, req)
	if err != nil {
		return "", hd.Errorf(ctx,
			http.StatusInternalServerError,
			http.StatusText(http.StatusInternalServerError),
			"failed Speech.Recognize: %w", err)
	}

	if len(res.Results) == 0 {
		return "", hd.Errorf(ctx,
			http.StatusBadRequest,
			"no text was recognized",
			"no text was recognized")
	}

	return res.Results[0].Alternatives[0].Transcript, nil
}