qwiklabs/colab-enterprise/gen-ai-demo/Audio-Generation-Pipeline.ipynb (358 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9C69oK48tReV",
"metadata": {
"id": "9C69oK48tReV"
},
"outputs": [],
"source": [
"##################################################################################\n",
"# Copyright 2024 Google LLC\n",
"#\n",
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# https://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License.\n",
"###################################################################################"
]
},
{
"cell_type": "markdown",
"id": "p7po1nqYtTIW",
"metadata": {
"id": "p7po1nqYtTIW"
},
"source": [
"# Data beans audio reviews generation\n",
"This notebooks generate synthetic audio reviews based the data beans table `data_beans.customer_review`\n",
"- It uses Google `text-to-speech` service"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "yzDTIVadV2bx",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"executionInfo": {
"elapsed": 19980,
"status": "ok",
"timestamp": 1707926381631,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "yzDTIVadV2bx",
"outputId": "76e05ecd-92b0-474d-aac3-b09d31ad811d"
},
"outputs": [],
"source": [
"! pip install google-cloud-texttospeech\n",
"! pip3 install --upgrade --user google-cloud-aiplatform"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "J5bkaL-LV9lx",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 593,
"status": "ok",
"timestamp": 1707926382219,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "J5bkaL-LV9lx",
"outputId": "df1b680b-7e86-4086-cfff-bce7d6546c12"
},
"outputs": [],
"source": [
"# Restart kernel after installs so that your environment can access the new packages\n",
"import IPython\n",
"\n",
"app = IPython.Application.instance()\n",
"app.kernel.do_shutdown(True)"
]
},
{
"cell_type": "markdown",
"id": "jxgjDFt3te12",
"metadata": {
"id": "jxgjDFt3te12"
},
"source": [
"## Imports and parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "YTK9UlfDU20o",
"metadata": {
"executionInfo": {
"elapsed": 3,
"status": "ok",
"timestamp": 1707926560390,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "YTK9UlfDU20o"
},
"outputs": [],
"source": [
"import random\n",
"import logging\n",
"import os\n",
"import IPython.display\n",
"\n",
"\n",
"from tqdm import tqdm\n",
"from google.cloud import texttospeech\n",
"from google.cloud import bigquery"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d_d9ZW-Mt8yY",
"metadata": {
"executionInfo": {
"elapsed": 456,
"status": "ok",
"timestamp": 1707926563455,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "d_d9ZW-Mt8yY"
},
"outputs": [],
"source": [
"PROJECT_ID = \"${project_id}\"\n",
"DATASET_ID = \"${bigquery_data_beans_curated_dataset}\"\n",
"LOCATION = \"us-central1\"\n",
"TABLE_ID = \"customer_review\"\n",
"AUDIO_GEN_DIR = \"beans_audios\"\n",
"VOICES = [ 'en-GB-Wavenet-A','en-GB-Wavenet-B','en-GB-Wavenet-C','en-GB-Wavenet-D']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "T9NNmUlhu5_D",
"metadata": {
"executionInfo": {
"elapsed": 3,
"status": "ok",
"timestamp": 1707926564339,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "T9NNmUlhu5_D"
},
"outputs": [],
"source": [
"logging.getLogger().setLevel(logging.INFO)\n",
"os.environ[\"GOOGLE_CLOUD_PROJECT\"] = PROJECT_ID\n",
"os.environ[\"GOOGLE_CLOUD_QUOTA_PROJECT\"] = PROJECT_ID"
]
},
{
"cell_type": "markdown",
"id": "T7yHmZMnuWEg",
"metadata": {
"id": "T7yHmZMnuWEg"
},
"source": [
"## Imports and parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "QXuNh6SXuYBy",
"metadata": {
"executionInfo": {
"elapsed": 416,
"status": "ok",
"timestamp": 1707926567523,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "QXuNh6SXuYBy"
},
"outputs": [],
"source": [
"def _get_reviews():\n",
" client = bigquery.Client()\n",
" sql = f\"\"\"\n",
" SELECT *\n",
" FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`\n",
" WHERE review_audio_filename is null \n",
" ORDER BY customer_review_id\n",
" \"\"\"\n",
" return client.query(sql).to_dataframe()\n",
"\n",
"def _get_processed_ids(directory):\n",
" ids = []\n",
" for file in os.listdir(directory):\n",
" if file.endswith(\".mp3\"):\n",
" id = int(file.split(\"_\")[1].split(\".\")[0])\n",
" ids.append(id)\n",
" return ids\n",
"\n",
"def _synth_audio(review):\n",
" tts_client = texttospeech.TextToSpeechClient()\n",
" synthesis_input = texttospeech.SynthesisInput(text=review)\n",
" voice_name = random.choice(VOICES)\n",
" voice = texttospeech.VoiceSelectionParams(language_code=\"en-GB\", name=voice_name)\n",
" audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)\n",
" response = tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)\n",
" return response.audio_content\n",
"\n",
"def _save_audio(audio,id):\n",
" with open(os.path.join(AUDIO_GEN_DIR,f\"audio_{id}.mp3\"), \"wb\") as f:\n",
" f.write(audio)"
]
},
{
"cell_type": "markdown",
"id": "djeVvQ1-Xc_i",
"metadata": {
"id": "djeVvQ1-Xc_i"
},
"source": [
"## Audio generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "744kcPE-Q4gs",
"metadata": {
"executionInfo": {
"elapsed": 444,
"status": "ok",
"timestamp": 1707926639628,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "744kcPE-Q4gs"
},
"outputs": [],
"source": [
"def gen_audio_reviews():\n",
" os.makedirs(AUDIO_GEN_DIR, exist_ok=True)\n",
" reviews_pd = _get_reviews()\n",
" for _, row in tqdm(reviews_pd.iterrows(), total=len(reviews_pd), desc=\"Generating Audios\"):\n",
" review = row['review_text']\n",
" customer_review_id = row['customer_review_id']\n",
" audio = _synth_audio(review)\n",
" _save_audio(audio,customer_review_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bMAerK-gW5rc",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 790
},
"executionInfo": {
"elapsed": 7966,
"status": "error",
"timestamp": 1707926647593,
"user": {
"displayName": "",
"userId": ""
},
"user_tz": -180
},
"id": "bMAerK-gW5rc",
"outputId": "f0ea7c4b-fd22-4fe7-a3d2-300dc6f0a717"
},
"outputs": [],
"source": [
"gen_audio_reviews()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96407bb8",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Audio \n",
"from IPython.core.display import display\n",
"processed_ids = _get_processed_ids(AUDIO_GEN_DIR)\n",
"\n",
"for item in processed_ids:\n",
" filename = os.path.join(AUDIO_GEN_DIR,f\"audio_{item}.mp3\")\n",
" display(Audio(filename, autoplay=True,rate=16000))"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}