speech/captionasync/captionasync.go

// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Command captionasync sends audio data to the Google Speech API // and prints its transcript. package main import ( "context" "fmt" "io" "log" "os" "strings" speech "cloud.google.com/go/speech/apiv1" "cloud.google.com/go/speech/apiv1/speechpb" ) const usage = `Usage: captionasync <audiofile> Audio file must be a 16-bit signed little-endian encoded with a sample rate of 16000. The path to the audio file may be a GCS URI (gs://...).` func main() { if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, usage) os.Exit(2) } var sendFunc func(io.Writer, *speech.Client, string) error path := os.Args[1] if strings.Contains(path, "://") { sendFunc = sendGCS } else { sendFunc = send } ctx := context.Background() client, err := speech.NewClient(ctx) if err != nil { log.Fatal(err) } defer client.Close() if err := sendFunc(os.Stdout, client, os.Args[1]); err != nil { log.Fatal(err) } } // [START speech_transcribe_async] func send(w io.Writer, client *speech.Client, filename string) error { ctx := context.Background() data, err := os.ReadFile(filename) if err != nil { return err } // Send the contents of the audio file with the encoding and // and sample rate information to be transcripted. req := &speechpb.LongRunningRecognizeRequest{ Config: &speechpb.RecognitionConfig{ Encoding: speechpb.RecognitionConfig_LINEAR16, SampleRateHertz: 16000, LanguageCode: "en-US", }, Audio: &speechpb.RecognitionAudio{ AudioSource: &speechpb.RecognitionAudio_Content{Content: data}, }, } op, err := client.LongRunningRecognize(ctx, req) if err != nil { return err } resp, err := op.Wait(ctx) if err != nil { return err } // Print the results. for _, result := range resp.Results { for _, alt := range result.Alternatives { fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence) } } return nil } // [END speech_transcribe_async] // [START speech_transcribe_async_gcs] func sendGCS(w io.Writer, client *speech.Client, gcsURI string) error { ctx := context.Background() // Send the contents of the audio file with the encoding and // and sample rate information to be transcripted. req := &speechpb.LongRunningRecognizeRequest{ Config: &speechpb.RecognitionConfig{ Encoding: speechpb.RecognitionConfig_LINEAR16, SampleRateHertz: 16000, LanguageCode: "en-US", }, Audio: &speechpb.RecognitionAudio{ AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI}, }, } op, err := client.LongRunningRecognize(ctx, req) if err != nil { return err } resp, err := op.Wait(ctx) if err != nil { return err } // Print the results. for _, result := range resp.Results { for _, alt := range result.Alternatives { fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence) } } return nil } // [END speech_transcribe_async_gcs]

speech/captionasync/captionasync.go (95 lines of code) (raw):