courses/understanding_spanner/colab-notebooks/Spanner_Queries_Transactions.ipynb (813 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "yir3_hhI6El2" }, "source": [ "## Install the Spanner Python API " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zIAMqF3FV2qQ" }, "outputs": [], "source": [ "! pip install --upgrade google-cloud-spanner" ] }, { "cell_type": "markdown", "metadata": { "id": "FLsnZvxW6YN3" }, "source": [ "## Set the following variables \n", "\n", "You at least need to change the Project ID variable to your project's ID. You can change the other variables if you like. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Fvl1JzxrWkl3" }, "outputs": [], "source": [ "project_id = 'your-project-id-here'\n", "instance_id = 'my-spanner-instance'\n", "processing_units = 100\n", "database_id = 'pets-db'\n", "\n", "OPERATION_TIMEOUT_SECONDS = 240\n", "\n", "!gcloud services enable spanner.googleapis.com \n", "print(\"Spanner Enabled\")\n", "\n", "print(\"Done\")" ] }, { "cell_type": "markdown", "metadata": { "id": "-VjmkR1Y6VMX" }, "source": [ "## Create a Spanner instance, the Pets database, and add some test records " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "yqVnvI12XKAq" }, "outputs": [], "source": [ "import time\n", "from google.cloud import spanner\n", "import uuid\n", "\n", "def create_instance_with_processing_units(instance_id, processing_units):\n", " spanner_client = spanner.Client(project=project_id)\n", " config_name = \"{}/instanceConfigs/regional-us-central1\".format(\n", " spanner_client.project_name\n", " )\n", "\n", " instance = spanner_client.instance(\n", " instance_id,\n", " configuration_name=config_name,\n", " display_name=\"spanner-instance-python\",\n", " processing_units=processing_units,\n", " labels={\n", " \"created\": str(int(time.time())),\n", " },\n", " )\n", "\n", " operation = instance.create()\n", "\n", " print(\"Waiting for instance to be created...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\n", " \"Created instance {} with {} processing units\".format(\n", " instance_id, instance.processing_units\n", " )\n", " )\n", "\n", "def create_database(instance_id, database_id):\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", "\n", " database = instance.database(\n", " database_id,\n", " ddl_statements=[\n", " \"\"\"CREATE TABLE Owners (\n", " OwnerID STRING(36) NOT NULL,\n", " OwnerName STRING(MAX) NOT NULL\n", " ) PRIMARY KEY (OwnerID)\"\"\",\n", " \"\"\"CREATE TABLE Pets (\n", " OwnerID STRING(36) NOT NULL, \n", " PetID STRING(MAX) NOT NULL, \n", " PetType STRING(MAX) NOT NULL,\n", " PetName STRING(MAX) NOT NULL,\n", " Breed STRING(MAX) NOT NULL,\n", " ) PRIMARY KEY (OwnerID,PetID),\n", " INTERLEAVE IN PARENT Owners ON DELETE CASCADE\"\"\",\n", " \"\"\"CREATE INDEX PetsByOwnerID ON Pets(OwnerID)\"\"\",\n", " \"\"\"CREATE INDEX PetsByType ON Pets(PetType)\"\"\",\n", " \"\"\"CREATE INDEX OwnersByName ON Owners(OwnerName)\"\"\",\n", " ],\n", " )\n", "\n", " operation = database.create()\n", "\n", " print(\"Waiting for database to be created...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\"Created database {} on instance {}\".format(database_id, instance_id))\n", "\n", "\n", "def insert_data(instance_id, database_id):\n", " doug_id = str(uuid.uuid4())\n", " john_id = str(uuid.uuid4())\n", " sue_id = str(uuid.uuid4())\n", "\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " with database.batch() as batch:\n", " batch.insert(\n", " table=\"Owners\",\n", " columns=(\"OwnerID\", \"OwnerName\"),\n", " values=[\n", " (doug_id, u\"Doug\"),\n", " (john_id, u\"John\"),\n", " (sue_id, u\"Sue\"),\n", " ],\n", " )\n", "\n", " batch.insert(\n", " table=\"Pets\",\n", " columns=(\"PetID\", \"OwnerID\", \"PetType\", \"PetName\", \"Breed\"),\n", " values=[\n", " (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Noir\", u\"Schnoodle\"),\n", " (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Bree\", u\"Mutt\"),\n", " (str(uuid.uuid4()), doug_id, u\"Cat\", u\"Tom\", u\"Alley\"),\n", " (str(uuid.uuid4()), john_id, u\"Dog\", u\"Duke\", u\"GoldenDoodle\"),\\\n", " (str(uuid.uuid4()), john_id, u\"Dog\", u\"Sparky\", u\"Poodle\"),\n", " (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Cuff\", u\"Box\"),\n", " (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Link\", u\"Box\"),\n", " (str(uuid.uuid4()), sue_id, u\"Cat\", u\"Cleo\", u\"Domestic\"),\n", " ],\n", " )\n", "\n", " print(\"Inserted data.\")\n", "\n", "\n", "# Call the functions defined above\n", "create_instance_with_processing_units(instance_id, processing_units)\n", "create_database(instance_id, database_id)\n", "insert_data(instance_id, database_id)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "GDM4oSmb7WiY" }, "source": [ "## Run a query using SQL" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jM79QuodVlF2" }, "outputs": [], "source": [ "def run_simple_query(sql):\n", " # Instantiate a client.\n", " spanner_client = spanner.Client(project=project_id)\n", "\n", " # Get a Cloud Spanner instance by ID.\n", " instance = spanner_client.instance(instance_id)\n", "\n", " # Get a Cloud Spanner database by ID.\n", " database = instance.database(database_id)\n", "\n", " # Execute a simple SQL statement.\n", " with database.snapshot() as snapshot:\n", " results = snapshot.execute_sql(sql)\n", " for row in results:\n", " print(row)\n", "\n", "sql = \"\"\"SELECT OwnerName, PetName, PetType, Breed \n", " FROM Owners \n", " JOIN Pets ON Owners.OwnerID = Pets.OwnerID;\"\"\"\n", "\n", "\n", "run_simple_query(sql)" ] }, { "cell_type": "markdown", "metadata": { "id": "tIaIEp0k-luw" }, "source": [ "## Read data from a table" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5rqQrEcL980A" }, "outputs": [], "source": [ "def read_data(instance_id, database_id):\n", " \"\"\"Reads sample data from the database.\"\"\"\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " with database.snapshot() as snapshot:\n", " keyset = spanner.KeySet(all_=True)\n", " results = snapshot.read(\n", " table=\"Pets\", columns=(\"PetID\", \"PetName\", \"PetType\", \"Breed\"), keyset=keyset\n", " )\n", "\n", " for row in results:\n", " print(u\"PetID: {}, PetName: {}, PetType: {}, Breed: {}\".format(*row))\n", "\n", "\n", "# Call the function\n", "read_data(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "UNONRfo1-0_k" }, "source": [ "## Stale read\n", "\n", "Reads sample data from the database. The data is exactly 15 seconds stale." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xX-l9U8I-vu1" }, "outputs": [], "source": [ "def read_stale_data(instance_id, database_id):\n", "\n", " import datetime\n", "\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Everything is the same as the prior example except staleness\n", " staleness = datetime.timedelta(seconds=15)\n", " with database.snapshot(exact_staleness=staleness) as snapshot:\n", " keyset = spanner.KeySet(all_=True)\n", " results = snapshot.read(\n", " table=\"Pets\", columns=(\"PetID\", \"PetName\", \"PetType\", \"Breed\"), keyset=keyset,\n", " )\n", "\n", " for row in results:\n", " print(u\"PetID: {}, PetName: {}, PetType: {}, Breed: {}\".format(*row))\n", "\n", "# Call the function\n", "read_stale_data(instance_id, database_id)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "rAk-dCtZ_9bB" }, "source": [ "## Run query using index with parameters\n", "\n", "Most queries require one or more parameters. See the code below." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KkikPKzQ_7oQ" }, "outputs": [], "source": [ "def query_data_with_index(instance_id, database_id, owner_name=\"Doug\"):\n", "\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Define the parameters and their data types \n", " params = {\"owner_name\": owner_name}\n", " param_types = {\n", " \"owner_name\": spanner.param_types.STRING,\n", " }\n", "\n", " # Note, the parameter added to the query: @owner_name\n", " # and the parameters added to the execuste_sql function\n", " with database.snapshot() as snapshot:\n", " results = snapshot.execute_sql(\n", " \"\"\"SELECT OwnerName, PetName, PetType, Breed \n", " FROM Owners \n", " JOIN Pets ON Owners.OwnerID = Pets.OwnerID \n", " WHERE OwnerName = @owner_name\"\"\"\n", " \"\",\n", " params=params,\n", " param_types=param_types,\n", " )\n", "\n", " for row in results:\n", " print(row)\n", "\n", "query_data_with_index(instance_id, database_id, \"John\")" ] }, { "cell_type": "markdown", "metadata": { "id": "coBd8Nu5F6uq" }, "source": [ "## Run query using index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KTaMUdYoFdel" }, "outputs": [], "source": [ "def get_pets_by_type(instance_id, database_id, pet_type=\"Dog\"):\n", "\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " params = {\"pet_type\": pet_type}\n", " param_types = {\n", " \"pet_type\": spanner.param_types.STRING,\n", " }\n", " # You need to tell Spanner to use an index. \n", " # That is a little different than what you may be accustomed to. \n", " with database.snapshot() as snapshot:\n", " results = snapshot.execute_sql(\n", " \"\"\"SELECT *\n", " FROM Pets@{FORCE_INDEX=PetsByType} \n", " WHERE PetType = @pet_type\"\"\"\n", " \"\",\n", " params=params,\n", " param_types=param_types,\n", " )\n", "\n", " for row in results:\n", " print(row)\n", "\n", "get_pets_by_type(instance_id, database_id, pet_type=\"Dog\")" ] }, { "cell_type": "markdown", "metadata": { "id": "5dLgdXasGcTl" }, "source": [ "## Read data with an index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5vpqL0cCGX14" }, "outputs": [], "source": [ "def read_owners_with_index(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Similar to the previous example, but using the read function. \n", " # Specify the index as a parameter to the runction. \n", " with database.snapshot() as snapshot:\n", " keyset = spanner.KeySet(all_=True)\n", " results = snapshot.read(\n", " table=\"Owners\", \n", " columns=(\"OwnerName\",), \n", " keyset=keyset,\n", " index=\"OwnersByName\",\n", " )\n", "\n", " for row in results:\n", " print(row)\n", "\n", "\n", "read_owners_with_index(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "ytSK-3PWTOrz" }, "source": [ "## Add an index\n", "\n", "You don't need to interrupt the database in order to make a schema change. In the example below, an index is added using a DDL statement. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RqaDGQQ9TSmC" }, "outputs": [], "source": [ "def add_index(instance_id, database_id):\n", " \"\"\"Adds a simple index to the example database.\"\"\"\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Add an index using a DDL SQL statement\n", " operation = database.update_ddl(\n", " [\"CREATE INDEX PetsByTypeBreedName ON Pets(PetType,Breed,PetName)\"]\n", " )\n", "\n", " print(\"Waiting for operation to complete...\")\n", " operation.result(OPERATION_TIMEOUT_SECONDS)\n", "\n", " print(\"Added the PetsByTypeBreedName index.\")\n", "\n", "# Call the function\n", "add_index(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "5Y51OmRVUGoO" }, "source": [ "## Read with index just created\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_QTAQW7TUGy4" }, "outputs": [], "source": [ "def read_pets_with_index(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " with database.snapshot() as snapshot:\n", " keyset = spanner.KeySet(all_=True)\n", " results = snapshot.read(\n", " table=\"Pets\", \n", " columns=(\"PetType\",\"Breed\",\"PetName\"), \n", " keyset=keyset,\n", " index=\"PetsByTypeBreedName\",\n", " )\n", "\n", " for row in results:\n", " print(row)\n", "\n", "# Call the function\n", "read_pets_with_index(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "C8slDHRSVygU" }, "source": [ "## Insert some records using a transaction\n", "\n", "The example below is simple, but demonstrates how to run a transaction. \n", "\n", "\n", "Notice the use of a closure in Python. That is, a function within a function. In the inner function, the INSERT statement is run using the transaction that is created in the outer function. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "BFOmTalRVyrx" }, "outputs": [], "source": [ "import uuid\n", "\n", "def insert_owner(instance_id, database_id, owner_id, owner_name):\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " def insert_owners(transaction, owner_id, owner_name):\n", " params = {\"owner_id\": owner_id, \n", " \"owner_name\": owner_name,\n", " }\n", " param_types = {\n", " \"owner_id\": spanner.param_types.STRING,\n", " \"owner_name\": spanner.param_types.STRING,\n", " } \n", " \n", " # The transaction is passed as a argument. \n", " # Use the execute_update method when updating the data. \n", " row_ct = transaction.execute_update(\n", " \"\"\"INSERT Owners (OwnerID, OwnerName)\n", " VALUES (@owner_id, @owner_name)\"\"\",\n", " params=params,\n", " param_types=param_types,\n", " )\n", " print(\"{} record(s) inserted.\".format(row_ct))\n", "\n", " # Exectute the run_in_transaction function, passing a pointer\n", " # to the function that updates the data, along with the data required. \n", " database.run_in_transaction(insert_owners, owner_id, owner_name)\n", "\n", "\n", "\n", "# Call the function\n", "insert_owner(instance_id, database_id, str(uuid.uuid4()), \"Dave\" )\n" ] }, { "cell_type": "markdown", "metadata": { "id": "Wl5bXrwVYwa8" }, "source": [ "## Run update query with partitioned DML\n", "\n", "This is the second type of transaction that Spanner supports. It is designed for bulk changes to the data." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WQutbU8N_goO" }, "outputs": [], "source": [ "def update_data_with_dml(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " row_ct = database.execute_partitioned_dml(\n", " \"\"\"UPDATE Pets SET PetType = 'Canine'\n", " WHERE PetType = 'Dog'\"\"\")\n", " \n", " print(\"{} record(s) updated.\".format(row_ct))\n", "\n", "\n", "# Call the function\n", "update_data_with_dml(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "tiokdY7YZfnA" }, "source": [ "## Check changes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "-k9FzD27Zfwt" }, "outputs": [], "source": [ "def run_query(sql):\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Execute a simple SQL statement.\n", " with database.snapshot() as snapshot:\n", " results = snapshot.execute_sql(sql)\n", " for row in results:\n", " print(row)\n", "\n", "\n", "sql = \"\"\"SELECT OwnerName, PetName, PetType, Breed \n", " FROM Owners \n", " LEFT JOIN Pets ON Owners.OwnerID = Pets.OwnerID;\"\"\"\n", "\n", "run_query(sql)" ] }, { "cell_type": "markdown", "metadata": { "id": "12hVh5AMGHtT" }, "source": [ "## Run read-only transaction\n", "\n", "Read-only transactions are the third type of transaction supported by Spanner. Because both reads are using the same snapshot, they are guaranteed to be executed at the same timestamp. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UOzuVOZkIXXj" }, "outputs": [], "source": [ "def read_only_transaction(instance_id, database_id):\n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " # Both reads use the same snapshot, so are executed at the same timestamp\n", " with database.snapshot(multi_use=True) as snapshot:\n", " # First Read.\n", " results = snapshot.execute_sql(\"SELECT OwnerName FROM Owners\")\n", "\n", " print(\"Results from first read:\")\n", " for row in results:\n", " print(row)\n", "\n", " #Second Read\n", " results = snapshot.execute_sql(\"SELECT PetName, PetType, Breed FROM Pets\")\n", "\n", " print(\"Results from second read:\")\n", " for row in results:\n", " print(row)\n", "\n", "\n", "read_only_transaction(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "_AVdvElrZ-aG" }, "source": [ "## Delete all of the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SImpKl-3aCD9" }, "outputs": [], "source": [ "def delete_data_with_dml(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", "\n", " def delete_pets_owners(transaction):\n", " row_ct = transaction.execute_update(\n", " \"DELETE FROM Pets WHERE true = true\"\n", " )\n", " row_ct += transaction.execute_update(\n", " \"DELETE FROM Owners WHERE true = true\"\n", " )\n", "\n", " print(\"{} record(s) deleted.\".format(row_ct))\n", "\n", " database.run_in_transaction(delete_pets_owners)\n", "\n", "# Call the function\n", "delete_data_with_dml(instance_id, database_id)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "KkFNBsunbZ8t" }, "source": [ "## Delete the database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UOYfn7bxbZVZ" }, "outputs": [], "source": [ "def delete_database(instance_id, database_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " database = instance.database(database_id)\n", " \n", " database.drop()\n", "\n", " print(\"{} database dropped\".format(database_id))\n", "\n", " \n", "\n", "# Call the function\n", "delete_database(instance_id, database_id)" ] }, { "cell_type": "markdown", "metadata": { "id": "lWXl0d_UcnU3" }, "source": [ "## Delete the instance" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "hK3BYqbxcrp3" }, "outputs": [], "source": [ "def delete_spanner_instance(instance_id):\n", " \n", " spanner_client = spanner.Client(project=project_id)\n", " instance = spanner_client.instance(instance_id)\n", " instance.delete()\n", "\n", " print(\"{} instance deleted\".format(instance_id))\n", "\n", "# Call the function\n", "delete_spanner_instance(instance_id)" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyOb7IWMldNMS3d5dYqPFzkL", "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3.8.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.2" }, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 0 }