qwiklabs/colab-enterprise/gen-ai-demo/Audio-Generation-Pipeline.ipynb (358 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9C69oK48tReV", "metadata": { "id": "9C69oK48tReV" }, "outputs": [], "source": [ "##################################################################################\n", "# Copyright 2024 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License.\n", "###################################################################################" ] }, { "cell_type": "markdown", "id": "p7po1nqYtTIW", "metadata": { "id": "p7po1nqYtTIW" }, "source": [ "# Data beans audio reviews generation\n", "This notebooks generate synthetic audio reviews based the data beans table `data_beans.customer_review`\n", "- It uses Google `text-to-speech` service" ] }, { "cell_type": "code", "execution_count": null, "id": "yzDTIVadV2bx", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "executionInfo": { "elapsed": 19980, "status": "ok", "timestamp": 1707926381631, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "yzDTIVadV2bx", "outputId": "76e05ecd-92b0-474d-aac3-b09d31ad811d" }, "outputs": [], "source": [ "! pip install google-cloud-texttospeech\n", "! pip3 install --upgrade --user google-cloud-aiplatform" ] }, { "cell_type": "code", "execution_count": null, "id": "J5bkaL-LV9lx", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 593, "status": "ok", "timestamp": 1707926382219, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "J5bkaL-LV9lx", "outputId": "df1b680b-7e86-4086-cfff-bce7d6546c12" }, "outputs": [], "source": [ "# Restart kernel after installs so that your environment can access the new packages\n", "import IPython\n", "\n", "app = IPython.Application.instance()\n", "app.kernel.do_shutdown(True)" ] }, { "cell_type": "markdown", "id": "jxgjDFt3te12", "metadata": { "id": "jxgjDFt3te12" }, "source": [ "## Imports and parameters" ] }, { "cell_type": "code", "execution_count": null, "id": "YTK9UlfDU20o", "metadata": { "executionInfo": { "elapsed": 3, "status": "ok", "timestamp": 1707926560390, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "YTK9UlfDU20o" }, "outputs": [], "source": [ "import random\n", "import logging\n", "import os\n", "import IPython.display\n", "\n", "\n", "from tqdm import tqdm\n", "from google.cloud import texttospeech\n", "from google.cloud import bigquery" ] }, { "cell_type": "code", "execution_count": null, "id": "d_d9ZW-Mt8yY", "metadata": { "executionInfo": { "elapsed": 456, "status": "ok", "timestamp": 1707926563455, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "d_d9ZW-Mt8yY" }, "outputs": [], "source": [ "PROJECT_ID = \"${project_id}\"\n", "DATASET_ID = \"${bigquery_data_beans_curated_dataset}\"\n", "LOCATION = \"us-central1\"\n", "TABLE_ID = \"customer_review\"\n", "AUDIO_GEN_DIR = \"beans_audios\"\n", "VOICES = [ 'en-GB-Wavenet-A','en-GB-Wavenet-B','en-GB-Wavenet-C','en-GB-Wavenet-D']" ] }, { "cell_type": "code", "execution_count": null, "id": "T9NNmUlhu5_D", "metadata": { "executionInfo": { "elapsed": 3, "status": "ok", "timestamp": 1707926564339, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "T9NNmUlhu5_D" }, "outputs": [], "source": [ "logging.getLogger().setLevel(logging.INFO)\n", "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = PROJECT_ID\n", "os.environ[\"GOOGLE_CLOUD_QUOTA_PROJECT\"] = PROJECT_ID" ] }, { "cell_type": "markdown", "id": "T7yHmZMnuWEg", "metadata": { "id": "T7yHmZMnuWEg" }, "source": [ "## Imports and parameters" ] }, { "cell_type": "code", "execution_count": null, "id": "QXuNh6SXuYBy", "metadata": { "executionInfo": { "elapsed": 416, "status": "ok", "timestamp": 1707926567523, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "QXuNh6SXuYBy" }, "outputs": [], "source": [ "def _get_reviews():\n", " client = bigquery.Client()\n", " sql = f\"\"\"\n", " SELECT *\n", " FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`\n", " WHERE review_audio_filename is null \n", " ORDER BY customer_review_id\n", " \"\"\"\n", " return client.query(sql).to_dataframe()\n", "\n", "def _get_processed_ids(directory):\n", " ids = []\n", " for file in os.listdir(directory):\n", " if file.endswith(\".mp3\"):\n", " id = int(file.split(\"_\")[1].split(\".\")[0])\n", " ids.append(id)\n", " return ids\n", "\n", "def _synth_audio(review):\n", " tts_client = texttospeech.TextToSpeechClient()\n", " synthesis_input = texttospeech.SynthesisInput(text=review)\n", " voice_name = random.choice(VOICES)\n", " voice = texttospeech.VoiceSelectionParams(language_code=\"en-GB\", name=voice_name)\n", " audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)\n", " response = tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)\n", " return response.audio_content\n", "\n", "def _save_audio(audio,id):\n", " with open(os.path.join(AUDIO_GEN_DIR,f\"audio_{id}.mp3\"), \"wb\") as f:\n", " f.write(audio)" ] }, { "cell_type": "markdown", "id": "djeVvQ1-Xc_i", "metadata": { "id": "djeVvQ1-Xc_i" }, "source": [ "## Audio generation" ] }, { "cell_type": "code", "execution_count": null, "id": "744kcPE-Q4gs", "metadata": { "executionInfo": { "elapsed": 444, "status": "ok", "timestamp": 1707926639628, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "744kcPE-Q4gs" }, "outputs": [], "source": [ "def gen_audio_reviews():\n", " os.makedirs(AUDIO_GEN_DIR, exist_ok=True)\n", " reviews_pd = _get_reviews()\n", " for _, row in tqdm(reviews_pd.iterrows(), total=len(reviews_pd), desc=\"Generating Audios\"):\n", " review = row['review_text']\n", " customer_review_id = row['customer_review_id']\n", " audio = _synth_audio(review)\n", " _save_audio(audio,customer_review_id)" ] }, { "cell_type": "code", "execution_count": null, "id": "bMAerK-gW5rc", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 790 }, "executionInfo": { "elapsed": 7966, "status": "error", "timestamp": 1707926647593, "user": { "displayName": "", "userId": "" }, "user_tz": -180 }, "id": "bMAerK-gW5rc", "outputId": "f0ea7c4b-fd22-4fe7-a3d2-300dc6f0a717" }, "outputs": [], "source": [ "gen_audio_reviews()" ] }, { "cell_type": "code", "execution_count": null, "id": "96407bb8", "metadata": {}, "outputs": [], "source": [ "from IPython.display import Audio \n", "from IPython.core.display import display\n", "processed_ids = _get_processed_ids(AUDIO_GEN_DIR)\n", "\n", "for item in processed_ids:\n", " filename = os.path.join(AUDIO_GEN_DIR,f\"audio_{item}.mp3\")\n", " display(Audio(filename, autoplay=True,rate=16000))" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }