api/contactcenteranalysis/analyze_speech.go (135 lines of code) (raw):
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package contactcenteranalysis
import (
"context"
"fmt"
"net/http"
"github.com/rs/zerolog/log"
languagepb "google.golang.org/genproto/googleapis/cloud/language/v1"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
hd "github.com/GoogleCloudPlatform/appengine-cloud-demo-portal/api/pkg/handler"
)
type analyzeSpeechRequest struct {
Audio struct {
Content string `json:"content"`
} `json:"audio"`
Config struct {
LanguageCode string `json:"language_code"`
} `json:"config"`
}
type document struct {
Content string `json:"content"`
Language string `json:"language"`
}
type analyzeSpeechResponse struct {
Document *document `json:"document"`
Entities []*languagepb.Entity `json:"entities"`
DocumentSentiment *languagepb.Sentiment `json:"document_sentiment"`
Language string `json:"language"`
Categories []*languagepb.ClassificationCategory `json:"categories"`
}
func (h *handler) analyzeSpeechHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
logger := log.Ctx(ctx)
req := &analyzeSpeechRequest{}
if err := hd.DecodeJSONBody(r, req); err != nil {
hd.RespondErrorJSON(w, r, err)
return
}
if req.Config.LanguageCode == "" {
hd.RespondErrorMessage(w, r,
http.StatusBadRequest,
"language_code must be specified")
return
}
wave, err := hd.Base64ToWave(ctx, req.Audio.Content)
if err != nil {
hd.RespondErrorJSON(w, r, err)
return
}
speechReq := &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 48000,
LanguageCode: req.Config.LanguageCode,
EnableAutomaticPunctuation: true,
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: wave},
},
}
speechRes, err := h.Speech.Recognize(ctx, speechReq)
if err != nil {
err := hd.Errorf(ctx,
http.StatusInternalServerError,
http.StatusText(http.StatusInternalServerError),
"failed to requeest to Speech.Recognize: %w", err)
hd.RespondErrorJSON(w, r, err)
return
}
if len(speechRes.Results) == 0 {
err := hd.Errorf(ctx,
http.StatusBadRequest,
"no text was recognized",
"no text was recognized")
hd.RespondErrorJSON(w, r, err)
return
}
text := speechRes.Results[0].Alternatives[0].Transcript
logger.Debug().Msgf("recognized text = '%s'", text)
doc, err := buildDocument(ctx, req.Config.LanguageCode, text)
if err != nil {
hd.RespondErrorJSON(w, r, err)
return
}
languageReq := &languagepb.AnnotateTextRequest{
Document: doc,
Features: languageSupportedFeatures[doc.Language],
EncodingType: languagepb.EncodingType_UTF8,
}
logger.Debug().Msgf("%+v", languageReq)
languageRes, err := h.Language.AnnotateText(ctx, languageReq)
if err != nil {
err := hd.Errorf(ctx,
http.StatusInternalServerError,
http.StatusText(http.StatusInternalServerError),
"failed to requeest to Language.AnnotateText: %w", err)
hd.RespondErrorJSON(w, r, err)
return
}
logger.Debug().Msgf("%+v", languageRes)
res := &analyzeSpeechResponse{
Document: &document{
Content: text,
Language: doc.Language,
},
Entities: languageRes.Entities,
DocumentSentiment: languageRes.DocumentSentiment,
Language: req.Config.LanguageCode,
Categories: languageRes.Categories,
}
hd.RespondJSON(w, r, http.StatusOK, res)
}
func buildDocument(
ctx context.Context,
lang string,
text string,
) (*languagepb.Document, error) {
var sl *supportedLanguage
for _, l := range supportedLanguages {
if l.Code == lang {
sl = l
break
}
}
if sl == nil {
return nil, hd.Errorf(ctx,
http.StatusBadRequest,
fmt.Sprintf("%s is not supported", lang),
"unsupported language code: %s", lang)
}
return &languagepb.Document{
Type: languagepb.Document_PLAIN_TEXT,
Source: &languagepb.Document_Content{Content: text},
Language: sl.languageCode,
}, nil
}