speech/livecaption/livecaption.go (77 lines of code) (raw):

// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Command livecaption pipes the stdin audio data to // Google Speech API and outputs the transcript. // // As an example, gst-launch can be used to capture the mic input: // // $ gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | livecaption package main // [START speech_transcribe_streaming_mic] import ( "context" "fmt" "io" "log" "os" speech "cloud.google.com/go/speech/apiv1" "cloud.google.com/go/speech/apiv1/speechpb" ) func main() { ctx := context.Background() client, err := speech.NewClient(ctx) if err != nil { log.Fatal(err) } stream, err := client.StreamingRecognize(ctx) if err != nil { log.Fatal(err) } // Send the initial configuration message. if err := stream.Send(&speechpb.StreamingRecognizeRequest{ StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{ StreamingConfig: &speechpb.StreamingRecognitionConfig{ Config: &speechpb.RecognitionConfig{ Encoding: speechpb.RecognitionConfig_LINEAR16, SampleRateHertz: 16000, LanguageCode: "en-US", }, }, }, }); err != nil { log.Fatal(err) } go func() { // Pipe stdin to the API. buf := make([]byte, 1024) for { n, err := os.Stdin.Read(buf) if n > 0 { if err := stream.Send(&speechpb.StreamingRecognizeRequest{ StreamingRequest: &speechpb.StreamingRecognizeRequest_AudioContent{ AudioContent: buf[:n], }, }); err != nil { log.Printf("Could not send audio: %v", err) } } if err == io.EOF { // Nothing else to pipe, close the stream. if err := stream.CloseSend(); err != nil { log.Fatalf("Could not close stream: %v", err) } return } if err != nil { log.Printf("Could not read from stdin: %v", err) continue } } }() for { resp, err := stream.Recv() if err == io.EOF { break } if err != nil { log.Fatalf("Cannot stream results: %v", err) } if err := resp.Error; err != nil { // Workaround while the API doesn't give a more informative error. if err.Code == 3 || err.Code == 11 { log.Print("WARNING: Speech recognition request exceeded limit of 60 seconds.") } log.Fatalf("Could not recognize: %v", err) } for _, result := range resp.Results { fmt.Printf("Result: %+v\n", result) } } } // [END speech_transcribe_streaming_mic]