texttospeech/synthesize_text/synthesize_text.go (96 lines of code) (raw):
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The synthesize_text command converts plain text or SSML content to an audio file.
package main
import (
"context"
"flag"
"fmt"
"io"
"log"
"os"
texttospeech "cloud.google.com/go/texttospeech/apiv1"
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
)
// [START tts_synthesize_text]
// SynthesizeText synthesizes plain text and saves the output to outputFile.
func SynthesizeText(w io.Writer, text, outputFile string) error {
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
return err
}
defer client.Close()
req := texttospeechpb.SynthesizeSpeechRequest{
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Text{Text: text},
},
// Note: the voice can also be specified by name.
// Names of voices can be retrieved with client.ListVoices().
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: "en-US",
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
},
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_MP3,
},
}
resp, err := client.SynthesizeSpeech(ctx, &req)
if err != nil {
return err
}
err = os.WriteFile(outputFile, resp.AudioContent, 0644)
if err != nil {
return err
}
fmt.Fprintf(w, "Audio content written to file: %v\n", outputFile)
return nil
}
// [END tts_synthesize_text]
// [START tts_synthesize_ssml]
// SynthesizeSSML synthesizes ssml and saves the output to outputFile.
//
// ssml must be well-formed according to:
//
// https://www.w3.org/TR/speech-synthesis/
//
// Example: <speak>Hello there.</speak>
func SynthesizeSSML(w io.Writer, ssml, outputFile string) error {
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
return err
}
defer client.Close()
req := texttospeechpb.SynthesizeSpeechRequest{
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Ssml{Ssml: ssml},
},
// Note: the voice can also be specified by name.
// Names of voices can be retrieved with client.ListVoices().
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: "en-US",
SsmlGender: texttospeechpb.SsmlVoiceGender_FEMALE,
},
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_MP3,
},
}
resp, err := client.SynthesizeSpeech(ctx, &req)
if err != nil {
return err
}
err = os.WriteFile(outputFile, resp.AudioContent, 0644)
if err != nil {
return err
}
fmt.Fprintf(w, "Audio content written to file: %v\n", outputFile)
return nil
}
// [END tts_synthesize_ssml]
func main() {
text := flag.String("text", "",
"The text from which to synthesize speech.")
ssml := flag.String("ssml", "",
"The ssml string from which to synthesize speech.")
outputFile := flag.String("output-file", "output.txt",
"The name of the output file.")
flag.Parse()
if *text != "" {
err := SynthesizeText(os.Stdout, *text, *outputFile)
if err != nil {
log.Fatal(err)
}
} else if *ssml != "" {
err := SynthesizeSSML(os.Stdout, *ssml, *outputFile)
if err != nil {
log.Fatal(err)
}
} else {
log.Fatal(`Error: please supply a --text or --ssml content.
Examples:
go run synthesize_text.go --text "hello"
go run synthesize_text.go --ssml "<speak>Hello there.</speak>"`)
}
}