machine_learning/cloud_ai_building_blocks/speech-to-speech/speech-to-speech.py

#!/usr/bin/python # # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import base64 from googleapiclient import discovery APIKEY = 'input-your-api-key-here' # Load audio file as a base64 encoded text. with open('ja-sample.flac', 'rb') as audio: content = base64.b64encode(audio.read()).decode() # Use Cloud Speech-to-Text API to transcribe audio. speech_service = discovery.build('speech', 'v1', developerKey=APIKEY) _request_body={ 'audio': { 'content': content # 音声データ }, 'config': { 'encoding': 'FLAC', # 音声コーデックを指定 'sampleRateHertz': 16000, # サンプリング周波数を指定 'languageCode': 'ja-JP', # 入力音声の言語に日本語を指定 }} response = speech_service.speech().recognize(body=_request_body).execute() source_text = response['results'][0]['alternatives'][0]['transcript'] # Translate text from Japanese to English. translate_service = discovery.build('translate', 'v2', developerKey=APIKEY) response = translate_service.translations().list( q=source_text, source='ja', target='en' ).execute() target_text = response['translations'][0]['translatedText'] # Synthesize English audio from text. tts_service = discovery.build('texttospeech', 'v1beta1', developerKey=APIKEY) _request_body = { 'input': { 'text': target_text # 発話するテキストを指定する }, 'voice': { 'languageCode': 'en-US', # 発話する言語を指定する 'name': 'en-US-Wavenet-D', # 発話する音声種類を指定する }, 'audioConfig': { 'audioEncoding': 'MP3' # 音声データの出力形式を指定する }} response = tts_service.text().synthesize(body=_request_body).execute() # Save audio file. with open('en-sample.mp3', 'wb') as audio_file: audio_file.write(base64.b64decode(response['audioContent']))

machine_learning/cloud_ai_building_blocks/speech-to-speech/speech-to-speech.py (37 lines of code) (raw):