courses/understanding_spanner/colab-notebooks/Spanner_Interleaved.ipynb (562 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "QKBCLNZVAv_j" }, "source": [ "## Install the Spanner Python API" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "7uhDuh2B3-CX" }, "outputs": [], "source": [ "! pip install --upgrade google-cloud-spanner" ] }, { "cell_type": "markdown", "metadata": { "id": "f-wsCeqBAyWl" }, "source": [ "## Set the following variables \n", "\n", "You at least need to change the Project ID variable to your project's ID. You can change the other variables if you like. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lJIrqAZz4CzU" }, "outputs": [], "source": [ "project_id = 'roi-spanner'\n", "instance_id = 'spanner-interleaved'\n", "processing_units = 100\n", "database_id = 'pets-interleaved'\n", "\n", "OPERATION_TIMEOUT_SECONDS = 240\n", "\n", "!gcloud services enable spanner.googleapis.com \n", "print(\"Spanner Enabled\")\n", "\n", "print(\"Done\")" ] }, { "cell_type": "markdown", "metadata": { "id": "iziApd7nA5F1" }, "source": [ "## Create a Spanner instance" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "fUJGt4or4DgG" }, "outputs": [], "source": [ "import time\n", "from google.cloud import spanner\n", "\n", "def create_instance_with_processing_units(instance_id, processing_units):\n", " \"\"\"Creates an instance.\"\"\"\n", " spanner_client = spanner.Client(project=project_id)\n", "\n", " config_name = \"{}/instanceConfigs/regional-us-central1\".format(\n", " spanner_client.project_name\n", " )\n", "\n", " instance = spanner_client.instance(\n", " instance_id,\n", " configuration_name=config_name,\n", " display_name=\"spanner-instance-interleaved\",\n", " processing_units=processing_units,\n", " )\n", "\n", " # create() returns a long-running operation\n", " operation = instance.create()\n", "\n", " print(\"Waiting for operation to complete...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\n", " \"Created instance {} with {} processing units\".format(\n", " instance_id, instance.processing_units\n", " )\n", " )\n", "\n", "# Call the function\n", "create_instance_with_processing_units(instance_id, processing_units)" ] }, { "cell_type": "markdown", "metadata": { "id": "Ghw0o_qxBFIe" }, "source": [ "## Create the Pets database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "hpgkaDz24Osx" }, "outputs": [], "source": [ "def create_database(instance_id, database_id):\n", " \"\"\"Creates a database and tables for sample data.\"\"\"\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", "\n", " database = instance.database(\n", " database_id,\n", " )\n", "\n", " # create() returns a long-running operation\n", " operation = database.create()\n", "\n", " print(\"Waiting for operation to complete...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\"Created database {} on instance {}\".format(database_id, instance_id))\n", "\n", "# Call the function\n", "create_database(instance_id, database_id)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "VRPYRaVaBHwp" }, "source": [ "## The following function runs DDL statements to build the database schema" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6JY0xghs6SaR" }, "outputs": [], "source": [ "def run_ddl_statement(instance_id, database_id, ddl):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " operation = database.update_ddl(\n", " [ddl]\n", " )\n", "\n", " print(\"Waiting for operation to complete...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\"Ran statement: {}\".format(ddl))\n" ] }, { "cell_type": "markdown", "metadata": { "id": "IDA3A4VYBSYp" }, "source": [ "## Create the Owners table" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "FIf7iqy16_2r" }, "outputs": [], "source": [ "ddl = \"\"\"CREATE TABLE Owners (\n", " OwnerID STRING(36) NOT NULL,\n", " OwnerName STRING(MAX) NOT NULL\n", " ) PRIMARY KEY (OwnerID)\"\"\"\n", "\n", "\n", "\n", "run_ddl_statement(instance_id, database_id, ddl)" ] }, { "cell_type": "markdown", "metadata": { "id": "5jWaHTkBBW2z" }, "source": [ "## Create the Pets table\n", "\n", "Note: the pets table is interleaved with the Owners table. This means each pet is stored with its owner. The primary key of the Pets table uses both OwnerID and PetID. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "afnOhO3v8LMQ" }, "outputs": [], "source": [ "ddl = \"\"\"CREATE TABLE Pets (\n", " OwnerID STRING(36) NOT NULL, \n", " PetID STRING(MAX) NOT NULL, \n", " PetType STRING(MAX) NOT NULL,\n", " PetName STRING(MAX) NOT NULL,\n", " Breed STRING(MAX) NOT NULL,\n", " ) PRIMARY KEY (OwnerID,PetID),\n", " INTERLEAVE IN PARENT Owners ON DELETE CASCADE\"\"\"\n", "\n", "\n", "run_ddl_statement(instance_id, database_id, ddl)" ] }, { "cell_type": "markdown", "metadata": { "id": "hWEAc578B1Ij" }, "source": [ "## Display the database schema" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1J_oDtTK9KVZ" }, "outputs": [], "source": [ "def get_database_ddl(instance_id, database_id):\n", " \"\"\"Gets the database DDL statements.\"\"\"\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", " ddl = spanner_client.database_admin_api.get_database_ddl(database=database.name)\n", "\n", " print(\"Retrieved database DDL for {}\".format(database_id))\n", "\n", " for statement in ddl.statements:\n", " print(statement)\n", "\n", "\n", "get_database_ddl(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "GZRGAvhjB8ET" }, "source": [ "## Add some test records" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LO-z1x629VKF" }, "outputs": [], "source": [ "import uuid\n", "\n", "def insert_data(instance_id, database_id):\n", " doug_id = str(uuid.uuid4())\n", " john_id = str(uuid.uuid4())\n", " sue_id = str(uuid.uuid4())\n", "\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " with database.batch() as batch:\n", " batch.insert(\n", " table=\"Owners\",\n", " columns=(\"OwnerID\", \"OwnerName\"),\n", " values=[\n", " (doug_id, u\"Doug\"),\n", " (john_id, u\"John\"),\n", " (sue_id, u\"Sue\"),\n", " ],\n", " )\n", "\n", " batch.insert(\n", " table=\"Pets\",\n", " columns=(\"PetID\", \"OwnerID\", \"PetType\", \"PetName\", \"Breed\"),\n", " values=[\n", " (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Noir\", u\"Schnoodle\"),\n", " (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Bree\", u\"Mutt\"),\n", " (str(uuid.uuid4()), doug_id, u\"Cat\", u\"Tom\", u\"Alley\"),\n", " (str(uuid.uuid4()), john_id, u\"Dog\", u\"Duke\", u\"GoldenDoodle\"),\\\n", " (str(uuid.uuid4()), john_id, u\"Dog\", u\"Sparky\", u\"Poodle\"),\n", " (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Cuff\", u\"Box\"),\n", " (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Link\", u\"Box\"),\n", " (str(uuid.uuid4()), sue_id, u\"Cat\", u\"Cleo\", u\"Domestic\"),\n", " ],\n", " )\n", "\n", " print(\"Inserted data.\")\n", "\n", "\n", "insert_data(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "68K_pvCuCUUq" }, "source": [ "## Runs the query passed as an argument" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cbNHZ51G-GAq" }, "outputs": [], "source": [ "def run_query(sql):\n", " # Instantiate a client.\n", " spanner_client = spanner.Client(project=project_id)\n", "\n", " # Get a Cloud Spanner instance by ID.\n", " instance = spanner_client.instance(instance_id)\n", "\n", " # Get a Cloud Spanner database by ID.\n", " database = instance.database(database_id)\n", "\n", " # Execute a simple SQL statement.\n", " with database.snapshot() as snapshot:\n", " results = snapshot.execute_sql(sql)\n", " for row in results:\n", " print(row)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "k3QOelQ6CeKG" }, "source": [ "## Query with a join" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Ofdj6ok2-OEj" }, "outputs": [], "source": [ "sql = \"\"\"SELECT Owners.OwnerID, OwnerName, PetName, PetType, Breed \n", " FROM Owners \n", " JOIN Pets ON Owners.OwnerID = Pets.OwnerID;\"\"\"\n", "\n", "run_query(sql)" ] }, { "cell_type": "markdown", "metadata": { "id": "T8cj0PiaCi1l" }, "source": [ "Query with Nested Array" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "O91WCP_l-XAb" }, "outputs": [], "source": [ "sql = \"\"\"SELECT OwnerName,\n", " ARRAY(SELECT AS STRUCT PetName, PetType, Breed FROM Pets WHERE OwnerID = '54b4a7cd-e8cd-4ce6-9775-ac149460fc96') as Pets,\n", "FROM Owners WHERE OwnerID = '54b4a7cd-e8cd-4ce6-9775-ac149460fc96';\n", "\"\"\"\n", "\n", "run_query(sql)" ] }, { "cell_type": "markdown", "metadata": { "id": "rY-bbqHlgWuS" }, "source": [ "## Query with aggregated array" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "8VsstJPbbOnl" }, "outputs": [], "source": [ "sql = \"\"\"SELECT OwnerName, \n", " Array_Agg(STRUCT(PetName, PetType, Breed)) as Pets\n", " FROM Owners \n", " JOIN Pets ON Owners.OwnerID = Pets.OwnerID\n", " GROUP BY OwnerName;\"\"\"\n", "\n", "run_query(sql)" ] }, { "cell_type": "markdown", "metadata": { "id": "-mB52ZWOC3rL" }, "source": [ "## Delete all of the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cwpex85KC6N7" }, "outputs": [], "source": [ "def delete_data_with_dml(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " def delete_owners(transaction):\n", " \n", " row_ct = transaction.execute_update(\n", " \"DELETE FROM Owners WHERE true = true\"\n", " )\n", "\n", " print(\"{} record(s) deleted.\".format(row_ct))\n", "\n", " database.run_in_transaction(delete_owners)\n", "\n", "# Call the function\n", "delete_data_with_dml(instance_id, database_id)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "HfjCtt0CDmgm" }, "source": [ "## Delete the database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LZjVAHLoDszL" }, "outputs": [], "source": [ "def delete_database(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", " \n", " database.drop()\n", "\n", " print(\"{} database dropped\".format(database_id))\n", "\n", " \n", "\n", "# Call the function\n", "delete_database(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "QawIviJTD1lb" }, "source": [ "## Delete the instance" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "-98p13_kD4-X" }, "outputs": [], "source": [ "def delete_spanner_instance(instance_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " instance.delete()\n", "\n", " print(\"{} instance deleted\".format(instance_id))\n", "\n", "# Call the function\n", "delete_spanner_instance(instance_id)" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyMjxG1e7boM1iEDuSy72Urj", "collapsed_sections": [], "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3.8.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.2" }, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 0 }