courses/understanding_spanner/colab-notebooks/Spanner_Queries_Transactions.ipynb (813 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "yir3_hhI6El2"
},
"source": [
"## Install the Spanner Python API "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zIAMqF3FV2qQ"
},
"outputs": [],
"source": [
"! pip install --upgrade google-cloud-spanner"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "FLsnZvxW6YN3"
},
"source": [
"## Set the following variables \n",
"\n",
"You at least need to change the Project ID variable to your project's ID. You can change the other variables if you like. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Fvl1JzxrWkl3"
},
"outputs": [],
"source": [
"project_id = 'your-project-id-here'\n",
"instance_id = 'my-spanner-instance'\n",
"processing_units = 100\n",
"database_id = 'pets-db'\n",
"\n",
"OPERATION_TIMEOUT_SECONDS = 240\n",
"\n",
"!gcloud services enable spanner.googleapis.com \n",
"print(\"Spanner Enabled\")\n",
"\n",
"print(\"Done\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-VjmkR1Y6VMX"
},
"source": [
"## Create a Spanner instance, the Pets database, and add some test records "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yqVnvI12XKAq"
},
"outputs": [],
"source": [
"import time\n",
"from google.cloud import spanner\n",
"import uuid\n",
"\n",
"def create_instance_with_processing_units(instance_id, processing_units):\n",
" spanner_client = spanner.Client(project=project_id)\n",
" config_name = \"{}/instanceConfigs/regional-us-central1\".format(\n",
" spanner_client.project_name\n",
" )\n",
"\n",
" instance = spanner_client.instance(\n",
" instance_id,\n",
" configuration_name=config_name,\n",
" display_name=\"spanner-instance-python\",\n",
" processing_units=processing_units,\n",
" labels={\n",
" \"created\": str(int(time.time())),\n",
" },\n",
" )\n",
"\n",
" operation = instance.create()\n",
"\n",
" print(\"Waiting for instance to be created...\")\n",
" operation.result(OPERATION_TIMEOUT_SECONDS)\n",
"\n",
" print(\n",
" \"Created instance {} with {} processing units\".format(\n",
" instance_id, instance.processing_units\n",
" )\n",
" )\n",
"\n",
"def create_database(instance_id, database_id):\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
"\n",
" database = instance.database(\n",
" database_id,\n",
" ddl_statements=[\n",
" \"\"\"CREATE TABLE Owners (\n",
" OwnerID STRING(36) NOT NULL,\n",
" OwnerName STRING(MAX) NOT NULL\n",
" ) PRIMARY KEY (OwnerID)\"\"\",\n",
" \"\"\"CREATE TABLE Pets (\n",
" OwnerID STRING(36) NOT NULL, \n",
" PetID STRING(MAX) NOT NULL, \n",
" PetType STRING(MAX) NOT NULL,\n",
" PetName STRING(MAX) NOT NULL,\n",
" Breed STRING(MAX) NOT NULL,\n",
" ) PRIMARY KEY (OwnerID,PetID),\n",
" INTERLEAVE IN PARENT Owners ON DELETE CASCADE\"\"\",\n",
" \"\"\"CREATE INDEX PetsByOwnerID ON Pets(OwnerID)\"\"\",\n",
" \"\"\"CREATE INDEX PetsByType ON Pets(PetType)\"\"\",\n",
" \"\"\"CREATE INDEX OwnersByName ON Owners(OwnerName)\"\"\",\n",
" ],\n",
" )\n",
"\n",
" operation = database.create()\n",
"\n",
" print(\"Waiting for database to be created...\")\n",
" operation.result(OPERATION_TIMEOUT_SECONDS)\n",
"\n",
" print(\"Created database {} on instance {}\".format(database_id, instance_id))\n",
"\n",
"\n",
"def insert_data(instance_id, database_id):\n",
" doug_id = str(uuid.uuid4())\n",
" john_id = str(uuid.uuid4())\n",
" sue_id = str(uuid.uuid4())\n",
"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" with database.batch() as batch:\n",
" batch.insert(\n",
" table=\"Owners\",\n",
" columns=(\"OwnerID\", \"OwnerName\"),\n",
" values=[\n",
" (doug_id, u\"Doug\"),\n",
" (john_id, u\"John\"),\n",
" (sue_id, u\"Sue\"),\n",
" ],\n",
" )\n",
"\n",
" batch.insert(\n",
" table=\"Pets\",\n",
" columns=(\"PetID\", \"OwnerID\", \"PetType\", \"PetName\", \"Breed\"),\n",
" values=[\n",
" (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Noir\", u\"Schnoodle\"),\n",
" (str(uuid.uuid4()), doug_id, u\"Dog\", u\"Bree\", u\"Mutt\"),\n",
" (str(uuid.uuid4()), doug_id, u\"Cat\", u\"Tom\", u\"Alley\"),\n",
" (str(uuid.uuid4()), john_id, u\"Dog\", u\"Duke\", u\"GoldenDoodle\"),\\\n",
" (str(uuid.uuid4()), john_id, u\"Dog\", u\"Sparky\", u\"Poodle\"),\n",
" (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Cuff\", u\"Box\"),\n",
" (str(uuid.uuid4()), john_id, u\"Turtle\", u\"Link\", u\"Box\"),\n",
" (str(uuid.uuid4()), sue_id, u\"Cat\", u\"Cleo\", u\"Domestic\"),\n",
" ],\n",
" )\n",
"\n",
" print(\"Inserted data.\")\n",
"\n",
"\n",
"# Call the functions defined above\n",
"create_instance_with_processing_units(instance_id, processing_units)\n",
"create_database(instance_id, database_id)\n",
"insert_data(instance_id, database_id)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "GDM4oSmb7WiY"
},
"source": [
"## Run a query using SQL"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jM79QuodVlF2"
},
"outputs": [],
"source": [
"def run_simple_query(sql):\n",
" # Instantiate a client.\n",
" spanner_client = spanner.Client(project=project_id)\n",
"\n",
" # Get a Cloud Spanner instance by ID.\n",
" instance = spanner_client.instance(instance_id)\n",
"\n",
" # Get a Cloud Spanner database by ID.\n",
" database = instance.database(database_id)\n",
"\n",
" # Execute a simple SQL statement.\n",
" with database.snapshot() as snapshot:\n",
" results = snapshot.execute_sql(sql)\n",
" for row in results:\n",
" print(row)\n",
"\n",
"sql = \"\"\"SELECT OwnerName, PetName, PetType, Breed \n",
" FROM Owners \n",
" JOIN Pets ON Owners.OwnerID = Pets.OwnerID;\"\"\"\n",
"\n",
"\n",
"run_simple_query(sql)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tIaIEp0k-luw"
},
"source": [
"## Read data from a table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5rqQrEcL980A"
},
"outputs": [],
"source": [
"def read_data(instance_id, database_id):\n",
" \"\"\"Reads sample data from the database.\"\"\"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" with database.snapshot() as snapshot:\n",
" keyset = spanner.KeySet(all_=True)\n",
" results = snapshot.read(\n",
" table=\"Pets\", columns=(\"PetID\", \"PetName\", \"PetType\", \"Breed\"), keyset=keyset\n",
" )\n",
"\n",
" for row in results:\n",
" print(u\"PetID: {}, PetName: {}, PetType: {}, Breed: {}\".format(*row))\n",
"\n",
"\n",
"# Call the function\n",
"read_data(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UNONRfo1-0_k"
},
"source": [
"## Stale read\n",
"\n",
"Reads sample data from the database. The data is exactly 15 seconds stale."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xX-l9U8I-vu1"
},
"outputs": [],
"source": [
"def read_stale_data(instance_id, database_id):\n",
"\n",
" import datetime\n",
"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Everything is the same as the prior example except staleness\n",
" staleness = datetime.timedelta(seconds=15)\n",
" with database.snapshot(exact_staleness=staleness) as snapshot:\n",
" keyset = spanner.KeySet(all_=True)\n",
" results = snapshot.read(\n",
" table=\"Pets\", columns=(\"PetID\", \"PetName\", \"PetType\", \"Breed\"), keyset=keyset,\n",
" )\n",
"\n",
" for row in results:\n",
" print(u\"PetID: {}, PetName: {}, PetType: {}, Breed: {}\".format(*row))\n",
"\n",
"# Call the function\n",
"read_stale_data(instance_id, database_id)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rAk-dCtZ_9bB"
},
"source": [
"## Run query using index with parameters\n",
"\n",
"Most queries require one or more parameters. See the code below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KkikPKzQ_7oQ"
},
"outputs": [],
"source": [
"def query_data_with_index(instance_id, database_id, owner_name=\"Doug\"):\n",
"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Define the parameters and their data types \n",
" params = {\"owner_name\": owner_name}\n",
" param_types = {\n",
" \"owner_name\": spanner.param_types.STRING,\n",
" }\n",
"\n",
" # Note, the parameter added to the query: @owner_name\n",
" # and the parameters added to the execuste_sql function\n",
" with database.snapshot() as snapshot:\n",
" results = snapshot.execute_sql(\n",
" \"\"\"SELECT OwnerName, PetName, PetType, Breed \n",
" FROM Owners \n",
" JOIN Pets ON Owners.OwnerID = Pets.OwnerID \n",
" WHERE OwnerName = @owner_name\"\"\"\n",
" \"\",\n",
" params=params,\n",
" param_types=param_types,\n",
" )\n",
"\n",
" for row in results:\n",
" print(row)\n",
"\n",
"query_data_with_index(instance_id, database_id, \"John\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "coBd8Nu5F6uq"
},
"source": [
"## Run query using index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KTaMUdYoFdel"
},
"outputs": [],
"source": [
"def get_pets_by_type(instance_id, database_id, pet_type=\"Dog\"):\n",
"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" params = {\"pet_type\": pet_type}\n",
" param_types = {\n",
" \"pet_type\": spanner.param_types.STRING,\n",
" }\n",
" # You need to tell Spanner to use an index. \n",
" # That is a little different than what you may be accustomed to. \n",
" with database.snapshot() as snapshot:\n",
" results = snapshot.execute_sql(\n",
" \"\"\"SELECT *\n",
" FROM Pets@{FORCE_INDEX=PetsByType} \n",
" WHERE PetType = @pet_type\"\"\"\n",
" \"\",\n",
" params=params,\n",
" param_types=param_types,\n",
" )\n",
"\n",
" for row in results:\n",
" print(row)\n",
"\n",
"get_pets_by_type(instance_id, database_id, pet_type=\"Dog\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5dLgdXasGcTl"
},
"source": [
"## Read data with an index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5vpqL0cCGX14"
},
"outputs": [],
"source": [
"def read_owners_with_index(instance_id, database_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Similar to the previous example, but using the read function. \n",
" # Specify the index as a parameter to the runction. \n",
" with database.snapshot() as snapshot:\n",
" keyset = spanner.KeySet(all_=True)\n",
" results = snapshot.read(\n",
" table=\"Owners\", \n",
" columns=(\"OwnerName\",), \n",
" keyset=keyset,\n",
" index=\"OwnersByName\",\n",
" )\n",
"\n",
" for row in results:\n",
" print(row)\n",
"\n",
"\n",
"read_owners_with_index(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ytSK-3PWTOrz"
},
"source": [
"## Add an index\n",
"\n",
"You don't need to interrupt the database in order to make a schema change. In the example below, an index is added using a DDL statement. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RqaDGQQ9TSmC"
},
"outputs": [],
"source": [
"def add_index(instance_id, database_id):\n",
" \"\"\"Adds a simple index to the example database.\"\"\"\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Add an index using a DDL SQL statement\n",
" operation = database.update_ddl(\n",
" [\"CREATE INDEX PetsByTypeBreedName ON Pets(PetType,Breed,PetName)\"]\n",
" )\n",
"\n",
" print(\"Waiting for operation to complete...\")\n",
" operation.result(OPERATION_TIMEOUT_SECONDS)\n",
"\n",
" print(\"Added the PetsByTypeBreedName index.\")\n",
"\n",
"# Call the function\n",
"add_index(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5Y51OmRVUGoO"
},
"source": [
"## Read with index just created\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_QTAQW7TUGy4"
},
"outputs": [],
"source": [
"def read_pets_with_index(instance_id, database_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" with database.snapshot() as snapshot:\n",
" keyset = spanner.KeySet(all_=True)\n",
" results = snapshot.read(\n",
" table=\"Pets\", \n",
" columns=(\"PetType\",\"Breed\",\"PetName\"), \n",
" keyset=keyset,\n",
" index=\"PetsByTypeBreedName\",\n",
" )\n",
"\n",
" for row in results:\n",
" print(row)\n",
"\n",
"# Call the function\n",
"read_pets_with_index(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "C8slDHRSVygU"
},
"source": [
"## Insert some records using a transaction\n",
"\n",
"The example below is simple, but demonstrates how to run a transaction. \n",
"\n",
"\n",
"Notice the use of a closure in Python. That is, a function within a function. In the inner function, the INSERT statement is run using the transaction that is created in the outer function. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BFOmTalRVyrx"
},
"outputs": [],
"source": [
"import uuid\n",
"\n",
"def insert_owner(instance_id, database_id, owner_id, owner_name):\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" def insert_owners(transaction, owner_id, owner_name):\n",
" params = {\"owner_id\": owner_id, \n",
" \"owner_name\": owner_name,\n",
" }\n",
" param_types = {\n",
" \"owner_id\": spanner.param_types.STRING,\n",
" \"owner_name\": spanner.param_types.STRING,\n",
" } \n",
" \n",
" # The transaction is passed as a argument. \n",
" # Use the execute_update method when updating the data. \n",
" row_ct = transaction.execute_update(\n",
" \"\"\"INSERT Owners (OwnerID, OwnerName)\n",
" VALUES (@owner_id, @owner_name)\"\"\",\n",
" params=params,\n",
" param_types=param_types,\n",
" )\n",
" print(\"{} record(s) inserted.\".format(row_ct))\n",
"\n",
" # Exectute the run_in_transaction function, passing a pointer\n",
" # to the function that updates the data, along with the data required. \n",
" database.run_in_transaction(insert_owners, owner_id, owner_name)\n",
"\n",
"\n",
"\n",
"# Call the function\n",
"insert_owner(instance_id, database_id, str(uuid.uuid4()), \"Dave\" )\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Wl5bXrwVYwa8"
},
"source": [
"## Run update query with partitioned DML\n",
"\n",
"This is the second type of transaction that Spanner supports. It is designed for bulk changes to the data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WQutbU8N_goO"
},
"outputs": [],
"source": [
"def update_data_with_dml(instance_id, database_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" row_ct = database.execute_partitioned_dml(\n",
" \"\"\"UPDATE Pets SET PetType = 'Canine'\n",
" WHERE PetType = 'Dog'\"\"\")\n",
" \n",
" print(\"{} record(s) updated.\".format(row_ct))\n",
"\n",
"\n",
"# Call the function\n",
"update_data_with_dml(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tiokdY7YZfnA"
},
"source": [
"## Check changes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-k9FzD27Zfwt"
},
"outputs": [],
"source": [
"def run_query(sql):\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Execute a simple SQL statement.\n",
" with database.snapshot() as snapshot:\n",
" results = snapshot.execute_sql(sql)\n",
" for row in results:\n",
" print(row)\n",
"\n",
"\n",
"sql = \"\"\"SELECT OwnerName, PetName, PetType, Breed \n",
" FROM Owners \n",
" LEFT JOIN Pets ON Owners.OwnerID = Pets.OwnerID;\"\"\"\n",
"\n",
"run_query(sql)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "12hVh5AMGHtT"
},
"source": [
"## Run read-only transaction\n",
"\n",
"Read-only transactions are the third type of transaction supported by Spanner. Because both reads are using the same snapshot, they are guaranteed to be executed at the same timestamp. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UOzuVOZkIXXj"
},
"outputs": [],
"source": [
"def read_only_transaction(instance_id, database_id):\n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" # Both reads use the same snapshot, so are executed at the same timestamp\n",
" with database.snapshot(multi_use=True) as snapshot:\n",
" # First Read.\n",
" results = snapshot.execute_sql(\"SELECT OwnerName FROM Owners\")\n",
"\n",
" print(\"Results from first read:\")\n",
" for row in results:\n",
" print(row)\n",
"\n",
" #Second Read\n",
" results = snapshot.execute_sql(\"SELECT PetName, PetType, Breed FROM Pets\")\n",
"\n",
" print(\"Results from second read:\")\n",
" for row in results:\n",
" print(row)\n",
"\n",
"\n",
"read_only_transaction(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_AVdvElrZ-aG"
},
"source": [
"## Delete all of the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SImpKl-3aCD9"
},
"outputs": [],
"source": [
"def delete_data_with_dml(instance_id, database_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
"\n",
" def delete_pets_owners(transaction):\n",
" row_ct = transaction.execute_update(\n",
" \"DELETE FROM Pets WHERE true = true\"\n",
" )\n",
" row_ct += transaction.execute_update(\n",
" \"DELETE FROM Owners WHERE true = true\"\n",
" )\n",
"\n",
" print(\"{} record(s) deleted.\".format(row_ct))\n",
"\n",
" database.run_in_transaction(delete_pets_owners)\n",
"\n",
"# Call the function\n",
"delete_data_with_dml(instance_id, database_id)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KkFNBsunbZ8t"
},
"source": [
"## Delete the database"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UOYfn7bxbZVZ"
},
"outputs": [],
"source": [
"def delete_database(instance_id, database_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" database = instance.database(database_id)\n",
" \n",
" database.drop()\n",
"\n",
" print(\"{} database dropped\".format(database_id))\n",
"\n",
" \n",
"\n",
"# Call the function\n",
"delete_database(instance_id, database_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lWXl0d_UcnU3"
},
"source": [
"## Delete the instance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "hK3BYqbxcrp3"
},
"outputs": [],
"source": [
"def delete_spanner_instance(instance_id):\n",
" \n",
" spanner_client = spanner.Client(project=project_id)\n",
" instance = spanner_client.instance(instance_id)\n",
" instance.delete()\n",
"\n",
" print(\"{} instance deleted\".format(instance_id))\n",
"\n",
"# Call the function\n",
"delete_spanner_instance(instance_id)"
]
}
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyOb7IWMldNMS3d5dYqPFzkL",
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.8.2 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.8.2"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}