ManagedkdbInsights/dbmaint/create_all.ipynb (1,111 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"id": "28bea13b-67bd-4a0e-8eab-3b8ffd37259e",
"metadata": {},
"source": [
"# DBMaint: Create Everything\n",
"This notebook will use the AWS boto3 APIs to create the needed resources for a dbmaint example.\n",
"\n",
"## AWS Resources Created\n",
"- Database \n",
"- Changeset to add data to database \n",
"- Scaling Group that will contain the two clusters \n",
"- Shared Volume to contain the two views (dbmaint and query) \n",
"- Dataviews: two, one for dbmaint another for query\n",
"- Clusters: two, dbmaint (GP type) and query (GP type)\n",
"\n",
"## Architecture\n",
"<img src=\"images/Deepdive Diagrams-dbmaint.drawio.png\" width=\"50%\">\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0d5f1d4a-ed45-44e3-bf75-9bdb75fcddbb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import subprocess\n",
"import boto3\n",
"import json\n",
"import datetime\n",
"\n",
"import pykx as kx\n",
"\n",
"from env import *\n",
"from config import *\n",
"from managed_kx import *\n",
"\n",
"# set q console width and height\n",
"kx.q.system.display_size = [50, 1000]\n",
"\n",
"# ----------------------------------------------------------------\n",
"# Source data directory\n",
"SOURCE_DATA_DIR=\"hdb\"\n",
"\n",
"# Code directory\n",
"CODEBASE=\"dbmaint\"\n",
"\n",
"# S3 Destinations\n",
"S3_CODE_PATH=\"code\"\n",
"S3_DATA_PATH=\"data\"\n",
"\n",
"NODE_TYPE=\"kx.sg.xlarge\"\n",
"\n",
"MAINT_DATABASE_CONFIG=[{ \n",
" 'databaseName': DB_NAME,\n",
" 'dataviewName': MAINT_DBVIEW_NAME\n",
" }]\n",
"\n",
"QUERY_DATABASE_CONFIG=[{ \n",
" 'databaseName': DB_NAME,\n",
" 'dataviewName': QUERY_DBVIEW_NAME\n",
" }]\n",
"\n",
"CODE_CONFIG={ 's3Bucket': S3_BUCKET, 's3Key': f'{S3_CODE_PATH}/{CODEBASE}.zip' }\n",
"\n",
"NAS1_CONFIG= {\n",
" 'type': 'SSD_250',\n",
" 'size': 1200\n",
"}\n",
"\n",
"INIT_SCRIPT='init.q'\n",
"CMD_ARGS=[\n",
" { 'key': 's', 'value': '4' }, \n",
" { 'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6' },\n",
"]\n",
"\n",
"VPC_CONFIG={ \n",
" 'vpcId': VPC_ID,\n",
" 'securityGroupIds': SECURITY_GROUPS,\n",
" 'subnetIds': SUBNET_IDS,\n",
" 'ipAddressType': 'IP_V4' \n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3cfe7d89-9f5d-4ceb-ac8c-1f5054a6f15a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Using credentials and create service client\n",
"session = boto3.Session()\n",
"\n",
"# create finspace client\n",
"client = session.client(service_name='finspace')"
]
},
{
"cell_type": "markdown",
"id": "8c3d4047-9583-4b09-b75d-98fd2ddd6c36",
"metadata": {},
"source": [
"# Create the Database\n",
"Create a database from the supplied data in hdb.tar.gz. "
]
},
{
"cell_type": "markdown",
"id": "5bd8efd4-a881-4871-af85-9b6125457d06",
"metadata": {},
"source": [
"## Untar HDB Data in hdb.tar.gz\n",
"Data will be found in hdb directory"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "38188d80-dff5-4809-96de-330002688e76",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!rm -rf hdb"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d9fab7b4-284a-472b-8089-01fa1db8ebc6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!tar -xf hdb.tar.gz"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1ef94785-4b95-468d-a078-c1698996e8fa",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 68\n",
"drwxrwxr-x 12 ec2-user ec2-user 4096 Nov 26 14:44 .\n",
"drwxrwxr-x 7 ec2-user ec2-user 4096 Nov 26 18:43 ..\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.16\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.17\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.18\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.19\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.20\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.21\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.22\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.23\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.24\n",
"drwxrwxr-x 3 ec2-user ec2-user 4096 Nov 26 14:44 2024.11.25\n",
"-rw-rw-r-- 1 ec2-user ec2-user 16392 Nov 26 14:44 sym\n"
]
}
],
"source": [
"!ls -la hdb"
]
},
{
"cell_type": "markdown",
"id": "8bf690f2-c465-4df8-90f5-1e3b808bb368",
"metadata": {},
"source": [
"## Stage HDB Data on S3\n",
"Using AWS cli, copy hdb to staging bucket"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "aca7f0d7-32cd-443b-b642-e7209b8516ef",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" PRE 2024.11.16/\n",
" PRE 2024.11.17/\n",
" PRE 2024.11.18/\n",
" PRE 2024.11.19/\n",
" PRE 2024.11.20/\n",
" PRE 2024.11.21/\n",
" PRE 2024.11.22/\n",
" PRE 2024.11.23/\n",
" PRE 2024.11.24/\n",
" PRE 2024.11.25/\n",
"2024-11-26 18:43:16 16392 sym\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S3_DEST=f\"s3://{S3_BUCKET}/{S3_DATA_PATH}/{SOURCE_DATA_DIR}/\"\n",
"\n",
"cp = \"\"\n",
"\n",
"if AWS_ACCESS_KEY_ID is not None:\n",
" cp = f\"\"\"\n",
"export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}\n",
"export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}\n",
"export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}\n",
"\"\"\"\n",
" \n",
"cp += f\"\"\"\n",
"aws s3 sync --exclude .DS_Store {SOURCE_DATA_DIR} {S3_DEST} --quiet\n",
"aws s3 ls {S3_DEST}\n",
"\"\"\"\n",
" \n",
"# execute the S3 copy\n",
"os.system(cp)"
]
},
{
"cell_type": "markdown",
"id": "17c759c4-ee6c-45c5-a9f6-6acacea3a3be",
"metadata": {},
"source": [
"## Create Managed Database\n",
"Using the AWS APIs, create a managed database in Managed kdb Insights."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d55bd8d3-5629-46f9-bc1f-47bb0308dc0a",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CREATING Database: dbmaintdb\n",
"CREATED Database: dbmaintdb\n",
"{\n",
" \"createdTimestamp\": \"2024-11-26 18:43:17.090000+00:00\",\n",
" \"databaseArn\": \"arn:aws:finspace:us-east-1:829845998889:kxEnvironment/jlcenjvtkgzrdek2qqv7ic/kxDatabase/dbmaintdb\",\n",
" \"databaseName\": \"dbmaintdb\",\n",
" \"description\": \"Basictick kdb database\",\n",
" \"environmentId\": \"jlcenjvtkgzrdek2qqv7ic\",\n",
" \"lastModifiedTimestamp\": \"2024-11-26 18:43:17.090000+00:00\"\n",
"}\n"
]
}
],
"source": [
"# assume it exists\n",
"create_db=False\n",
"\n",
"try:\n",
" resp = client.get_kx_database(environmentId=ENV_ID, databaseName=DB_NAME)\n",
" resp.pop('ResponseMetadata', None)\n",
"except:\n",
" # does not exist, will create\n",
" create_db=True\n",
"\n",
"if create_db:\n",
" print(f\"CREATING Database: {DB_NAME}\")\n",
" resp = client.create_kx_database(environmentId=ENV_ID, databaseName=DB_NAME, description=\"Basictick kdb database\")\n",
" resp.pop('ResponseMetadata', None)\n",
"\n",
" print(f\"CREATED Database: {DB_NAME}\")\n",
"\n",
"print(json.dumps(resp,sort_keys=True,indent=4,default=str))"
]
},
{
"cell_type": "markdown",
"id": "26d1194e-0c04-49a3-a7e7-a1d23fcff0d9",
"metadata": {},
"source": [
"## Add HDB Data to Database\n",
"Add the data in the local hdb directory to the managed database using the changeset mechanism. The Data will be copied to S3 then ingested with the create-kx-changeset API."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "eae61f04-1c9c-468e-bb38-b2e0b94897a0",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Changeset...\n",
"{\n",
" \"changeRequests\": [\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.19/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.19/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.17/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.17/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.25/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.25/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.22/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.22/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.23/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.23/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.18/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.18/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.16/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.16/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.24/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.24/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.21/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.21/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/2024.11.20/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2024.11.20/\"\n",
" },\n",
" {\n",
" \"changeType\": \"PUT\",\n",
" \"dbPath\": \"/\",\n",
" \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/sym\"\n",
" }\n",
" ],\n",
" \"changesetId\": \"Lsm05H0USQBBNX8sIskNPA\",\n",
" \"createdTimestamp\": \"2024-11-26 18:43:19.210000+00:00\",\n",
" \"databaseName\": \"dbmaintdb\",\n",
" \"environmentId\": \"jlcenjvtkgzrdek2qqv7ic\",\n",
" \"lastModifiedTimestamp\": \"2024-11-26 18:43:19.210000+00:00\",\n",
" \"status\": \"PENDING\"\n",
"}\n"
]
}
],
"source": [
"# Check if there is a changeset in the database, if so, no need to add another\n",
"c_set_list = list_kx_changesets(client, environmentId=ENV_ID, databaseName=DB_NAME)\n",
"\n",
"if len(c_set_list) == 0:\n",
"\n",
" changes=[]\n",
"\n",
" for f in os.listdir(f\"{SOURCE_DATA_DIR}\"):\n",
" if os.path.isdir(f\"{SOURCE_DATA_DIR}/{f}\"):\n",
" changes.append( { 'changeType': 'PUT', 's3Path': f\"{S3_DEST}{f}/\", 'dbPath': f\"/{f}/\" } )\n",
" else:\n",
" changes.append( { 'changeType': 'PUT', 's3Path': f\"{S3_DEST}{f}\", 'dbPath': f\"/\" } )\n",
"\n",
" resp = client.create_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, \n",
" changeRequests=changes)\n",
"\n",
" resp.pop('ResponseMetadata', None)\n",
" changeset_id = resp['changesetId']\n",
"\n",
" print(\"Changeset...\")\n",
" print(json.dumps(resp,sort_keys=True,indent=4,default=str))\n",
"else:\n",
" c_set_list=sorted(c_set_list, key=lambda d: d['createdTimestamp']) \n",
" changeset_id=c_set_list[-1]['changesetId']\n",
" print(f\"Using Last changeset: {changeset_id}\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b4422bdd-7d44-4fb0-8018-0bebd6987704",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...\n",
"Status is IN_PROGRESS, total wait 0:00:10, waiting 10 sec ...\n",
"**Done**\n"
]
}
],
"source": [
"wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=changeset_id, show_wait=True)\n",
"print(\"**Done**\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8ba008f3-4991-474c-9b3e-43a1dca56257",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"====================================================================================================\n",
"Database: dbmaintdb, Changesets: 1 \n",
"====================================================================================================\n",
" Changeset: Lsm05H0USQBBNX8sIskNPA: Created: 2024-11-26 18:43:19.210000+00:00 (COMPLETED)\n"
]
},
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"</style>\n",
"<table id=\"T_d0bcf\">\n",
" <thead>\n",
" <tr>\n",
" <th id=\"T_d0bcf_level0_col0\" class=\"col_heading level0 col0\" >changeType</th>\n",
" <th id=\"T_d0bcf_level0_col1\" class=\"col_heading level0 col1\" >s3Path</th>\n",
" <th id=\"T_d0bcf_level0_col2\" class=\"col_heading level0 col2\" >dbPath</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row0_col0\" class=\"data row0 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row0_col1\" class=\"data row0 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.19/</td>\n",
" <td id=\"T_d0bcf_row0_col2\" class=\"data row0 col2\" >/2024.11.19/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row1_col0\" class=\"data row1 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row1_col1\" class=\"data row1 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.17/</td>\n",
" <td id=\"T_d0bcf_row1_col2\" class=\"data row1 col2\" >/2024.11.17/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row2_col0\" class=\"data row2 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row2_col1\" class=\"data row2 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.25/</td>\n",
" <td id=\"T_d0bcf_row2_col2\" class=\"data row2 col2\" >/2024.11.25/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row3_col0\" class=\"data row3 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row3_col1\" class=\"data row3 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.22/</td>\n",
" <td id=\"T_d0bcf_row3_col2\" class=\"data row3 col2\" >/2024.11.22/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row4_col0\" class=\"data row4 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row4_col1\" class=\"data row4 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.23/</td>\n",
" <td id=\"T_d0bcf_row4_col2\" class=\"data row4 col2\" >/2024.11.23/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row5_col0\" class=\"data row5 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row5_col1\" class=\"data row5 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.18/</td>\n",
" <td id=\"T_d0bcf_row5_col2\" class=\"data row5 col2\" >/2024.11.18/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row6_col0\" class=\"data row6 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row6_col1\" class=\"data row6 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.16/</td>\n",
" <td id=\"T_d0bcf_row6_col2\" class=\"data row6 col2\" >/2024.11.16/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row7_col0\" class=\"data row7 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row7_col1\" class=\"data row7 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.24/</td>\n",
" <td id=\"T_d0bcf_row7_col2\" class=\"data row7 col2\" >/2024.11.24/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row8_col0\" class=\"data row8 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row8_col1\" class=\"data row8 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.21/</td>\n",
" <td id=\"T_d0bcf_row8_col2\" class=\"data row8 col2\" >/2024.11.21/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row9_col0\" class=\"data row9 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row9_col1\" class=\"data row9 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/2024.11.20/</td>\n",
" <td id=\"T_d0bcf_row9_col2\" class=\"data row9 col2\" >/2024.11.20/</td>\n",
" </tr>\n",
" <tr>\n",
" <td id=\"T_d0bcf_row10_col0\" class=\"data row10 col0\" >PUT</td>\n",
" <td id=\"T_d0bcf_row10_col1\" class=\"data row10 col1\" >s3://kdb-demo-829845998889-kms/data/hdb/sym</td>\n",
" <td id=\"T_d0bcf_row10_col2\" class=\"data row10 col2\" >/</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7f46f5eba680>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"note_str = \"\"\n",
"\n",
"c_set_list = list_kx_changesets(client, environmentId=ENV_ID, databaseName=DB_NAME)\n",
"\n",
"if len(c_set_list) == 0:\n",
" note_str = \"<<Could not get changesets>>\"\n",
"\n",
"print(100*\"=\")\n",
"print(f\"Database: {DB_NAME}, Changesets: {len(c_set_list)} {note_str}\")\n",
"print(100*\"=\")\n",
"\n",
"# sort by create time\n",
"c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) \n",
"\n",
"for c in c_set_list:\n",
" c_set_id = c['changesetId']\n",
" print(f\" Changeset: {c_set_id}: Created: {c['createdTimestamp']} ({c['status']})\")\n",
" c_rqs = client.get_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, changesetId=c_set_id)['changeRequests']\n",
"\n",
" chs_pdf = pd.DataFrame.from_dict(c_rqs).style.hide(axis='index')\n",
" display(chs_pdf)"
]
},
{
"cell_type": "markdown",
"id": "9dae0232-3666-491f-8891-dae30e12c9d8",
"metadata": {},
"source": [
"# Create Scaling Group\n",
"The scaling group represents the total compute avilable to the application. All clusters will be placed into the scaling group ans share the compute and memory of the scaling group."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "613be7f8-fb82-4415-b30c-186ed470dba4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Check if scaling group exits, only create if it does not\n",
"resp = get_kx_scaling_group(client=client, environmentId=ENV_ID, scalingGroupName=SCALING_GROUP_NAME)\n",
"\n",
"if resp is None:\n",
" resp = client.create_kx_scaling_group(\n",
" environmentId = ENV_ID, \n",
" scalingGroupName = SCALING_GROUP_NAME,\n",
" hostType=NODE_TYPE,\n",
" availabilityZoneId = AZ_ID\n",
" )\n",
"else:\n",
" print(f\"Scaling Group {SCALING_GROUP_NAME} exists\")"
]
},
{
"cell_type": "markdown",
"id": "6943fb16-8989-4199-a0fd-5c7c0d5aa56e",
"metadata": {},
"source": [
"# Create Shared Volume\n",
"The shared volume is a common storage device for the application. Every cluster using the shared volume will have a writable directory named after the cluster, can read the directories named after other clusters in the application using the volume. Also, there is a common "
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e4a8a247-d029-4f9b-aaf5-c6e2ffe200a1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Check if volume already exists before trying to create one\n",
"resp = get_kx_volume(client=client, environmentId=ENV_ID, volumeName=VOLUME_NAME)\n",
"\n",
"if resp is None:\n",
" resp = client.create_kx_volume(\n",
" environmentId = ENV_ID, \n",
" volumeType = 'NAS_1',\n",
" volumeName = VOLUME_NAME,\n",
" description = 'Shared volume',\n",
" nas1Configuration = NAS1_CONFIG,\n",
" azMode='SINGLE',\n",
" availabilityZoneIds=[ AZ_ID ] \n",
" )\n",
"else:\n",
" print(f\"Volume {VOLUME_NAME} exists\") "
]
},
{
"cell_type": "markdown",
"id": "9e718537-8853-4f21-8cc7-36b489e380c4",
"metadata": {},
"source": [
"# Wait for Volume and Scaling Group\n",
"Before proceeding to use Volumes and Scaling groups, wait for their creation to complete."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a3ed8931-e458-4ffb-83cc-0aa4da4d9f98",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:00:00, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:01:00, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:01:30, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:02:00, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:02:30, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:03:00, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:03:30, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:04:00, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is CREATING, total wait 0:04:30, waiting 30 sec ...\n",
"Scaling Group: SCALING_GROUP_dbmaint status is now ACTIVE, total wait 0:05:00\n",
"** DONE **\n",
"Volume: DBMAINT_VOLUME status is CREATING, total wait 0:00:00, waiting 30 sec ...\n",
"Volume: DBMAINT_VOLUME status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Volume: DBMAINT_VOLUME status is CREATING, total wait 0:01:00, waiting 30 sec ...\n",
"Volume: DBMAINT_VOLUME status is CREATING, total wait 0:01:30, waiting 30 sec ...\n",
"Volume: DBMAINT_VOLUME status is CREATING, total wait 0:02:00, waiting 30 sec ...\n",
"Volume: DBMAINT_VOLUME status is now ACTIVE, total wait 0:02:30\n",
"** DONE **\n"
]
}
],
"source": [
"# wait for the scaling group to create\n",
"wait_for_scaling_group_status(client=client, environmentId=ENV_ID, scalingGroupName=SCALING_GROUP_NAME, show_wait=True)\n",
"print(\"** DONE **\")\n",
"\n",
"# wait for the volume to create\n",
"wait_for_volume_status(client=client, environmentId=ENV_ID, volumeName=VOLUME_NAME, show_wait=True)\n",
"print(\"** DONE **\")"
]
},
{
"cell_type": "markdown",
"id": "fe41eaeb-9c8e-44d3-b8bc-f354142f9140",
"metadata": {},
"source": [
"# Create Dataviews\n",
"Create dataviews, for a specific (static) version of the database and have all of its data cached using the shared volume.\n",
"\n",
"By not giving changesetId to the create function, the latest changesetID of the database will be used for the view."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "03434316-4ccc-420d-adee-715e6eb1bcd6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Check if dataview already exists and is set to the requested changeset_id\n",
"resp = get_kx_dataview(client=client, environmentId=ENV_ID, databaseName=DB_NAME, dataviewName=MAINT_DBVIEW_NAME)\n",
"\n",
"if resp is None:\n",
" # sort by create time\n",
" c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) \n",
"\n",
" resp = client.create_kx_dataview(\n",
" environmentId = ENV_ID, \n",
" databaseName=DB_NAME, \n",
" dataviewName=MAINT_DBVIEW_NAME,\n",
" azMode='SINGLE',\n",
" availabilityZoneId=AZ_ID,\n",
" changesetId=c_set_list[-1]['changesetId'],\n",
" segmentConfigurations=[\n",
" { \n",
" 'dbPaths': ['/*'],\n",
" 'volumeName': VOLUME_NAME,\n",
" 'onDemand': True,\n",
" }\n",
" ],\n",
" autoUpdate=False,\n",
" readWrite=True,\n",
" description = f'Dataview of database {DB_NAME}'\n",
" )\n",
"else:\n",
" print(f\"Dataview {MAINT_DBVIEW_NAME} exists\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "501cc060-4eff-41b0-8980-c2e7d8eea858",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Check if dataview already exists and is set to the requested changeset_id\n",
"resp = get_kx_dataview(client=client, environmentId=ENV_ID, databaseName=DB_NAME, dataviewName=QUERY_DBVIEW_NAME)\n",
"\n",
"if resp is None:\n",
" # sort by create time\n",
" c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) \n",
"\n",
" resp = client.create_kx_dataview(\n",
" environmentId = ENV_ID, \n",
" databaseName=DB_NAME, \n",
" dataviewName=QUERY_DBVIEW_NAME,\n",
" azMode='SINGLE',\n",
" availabilityZoneId=AZ_ID,\n",
" segmentConfigurations=[\n",
" { \n",
" 'dbPaths': ['/*'],\n",
" 'volumeName': VOLUME_NAME,\n",
" }\n",
" ],\n",
" autoUpdate=False,\n",
" description = f'Dataview of database {DB_NAME}'\n",
" )\n",
"else:\n",
" print(f\"Dataview {QUERY_DBVIEW_NAME} exists\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "734ab7da-ef78-4701-9a58-1eeacbd9c557",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:00:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:01:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:01:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:02:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:02:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is CREATING, total wait 0:03:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_MAINT status is now ACTIVE, total wait 0:03:30\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:00:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:01:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:01:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:02:00, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is CREATING, total wait 0:02:30, waiting 30 sec ...\n",
"Dataview: dbmaintdb_DBVIEW_QUERY status is now ACTIVE, total wait 0:03:00\n",
"** DONE **\n"
]
}
],
"source": [
"# wait for the view to create\n",
"for v in all_views:\n",
" wait_for_dataview_status(client=client, environmentId=ENV_ID, databaseName=DB_NAME, dataviewName=v, show_wait=True)\n",
"print(\"** DONE **\")"
]
},
{
"cell_type": "markdown",
"id": "dea431b0-c501-46bb-b72a-a5eb80a335b0",
"metadata": {},
"source": [
"# Create Clusters\n",
"With foundational resources now completed, create the needed clusters for the application."
]
},
{
"cell_type": "markdown",
"id": "0f0c06ab-4dcb-4cc6-abc9-2c77ff3a4242",
"metadata": {},
"source": [
"## Stage Code to S3\n",
"Code to be used in this application must be staged to an S3 bucket the service can read from, that code will then be deployed to the clusters as part of their creation workflow."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b502a0a5-8610-4fc8-b6b7-04c47e89ba75",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"updating: initdb.q (deflated 22%)\n",
"updating: dbmaint.q (deflated 66%)\n",
"updating: init.q (deflated 13%)\n",
"upload: ./dbmaint.zip to s3://kdb-demo-829845998889-kms/code/dbmaint.zip\n",
"2023-06-05 21:25:21 0 \n",
"2024-11-26 15:24:58 16585 basictick.zip\n",
"2024-11-15 18:24:29 1184 bmll.zip\n",
"2024-11-26 15:26:06 455 code.zip\n",
"2023-12-21 19:47:37 574 codebundle.zip\n",
"2024-02-02 21:34:56 582 codebundle1.zip\n",
"2023-12-21 21:26:00 582 codebundle2.zip\n",
"2024-11-26 18:58:02 2607 dbmaint.zip\n",
"2024-09-04 17:42:17 556 foo.q.zip\n",
"2023-11-22 14:58:53 1530 jpmc_code.zip\n",
"2024-01-01 19:57:08 33781 kdb-tick-flat-largetable.zip\n",
"2023-12-30 22:56:33 38867 kdb-tick-flat.zip\n",
"2024-01-08 13:05:33 28741 kdb-tick.zip\n",
"2023-08-22 16:58:18 765 qcode.zip\n",
"2024-10-16 22:31:45 465 taqcode.zip\n",
"2024-04-26 16:38:46 487423 torq_app.zip\n",
"2024-03-06 19:01:11 5807282 torq_app_20240306_1901.zip\n",
"2024-03-06 19:13:22 5807290 torq_app_20240306_1913.zip\n",
"2024-03-13 15:57:24 5807307 torq_app_20240313_1557.zip\n",
"2024-03-13 18:16:01 5807310 torq_app_20240313_1815.zip\n",
"2024-03-14 16:03:45 5807310 torq_app_20240314_1603.zip\n",
"2024-03-15 16:59:48 5807310 torq_app_20240315_1659.zip\n",
"2024-03-18 20:09:13 8925181 torq_app_20240318_2009.zip\n",
"2024-01-30 16:52:19 3583 tradeplus.zip\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# zip the code\n",
"os.system(f\"cd {CODEBASE}; zip -r -X ../{CODEBASE}.zip . -x '*.ipynb_checkpoints*';\")\n",
"\n",
"cp = \"\"\n",
"\n",
"# copy code to S3\n",
"if AWS_ACCESS_KEY_ID is not None:\n",
" cp = f\"\"\"\n",
"export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}\n",
"export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}\n",
"export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}\n",
"\"\"\"\n",
"\n",
"cp += f\"\"\"\n",
"aws s3 cp --exclude .DS_Store {CODEBASE}.zip s3://{S3_BUCKET}/code/{CODEBASE}.zip\n",
"aws s3 ls s3://{S3_BUCKET}/code/\n",
"\"\"\"\n",
"\n",
"# execute the S3 copy\n",
"os.system(cp)"
]
},
{
"cell_type": "markdown",
"id": "4a2a31ee-07e1-408b-8fe9-2a9fb92f4df2",
"metadata": {},
"source": [
"## Create Clusters\n",
"\n",
"Create the cluster for performing dbmaint and another to use for queries."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "3abd68fa-5690-4374-bb68-4277bb87cf26",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# cluster already exists?\n",
"resp = get_kx_cluster(client, environmentId=ENV_ID, clusterName=MAINT_CLUSTER_NAME)\n",
"\n",
"if resp is None:\n",
" resp = client.create_kx_cluster(\n",
" environmentId=ENV_ID, \n",
" clusterName=MAINT_CLUSTER_NAME,\n",
" clusterType='GP',\n",
" releaseLabel = '1.0',\n",
" executionRole=EXECUTION_ROLE,\n",
" databases=MAINT_DATABASE_CONFIG,\n",
" scalingGroupConfiguration={\n",
" 'memoryReservation': 6,\n",
" 'nodeCount': 1,\n",
" 'scalingGroupName': SCALING_GROUP_NAME,\n",
" },\n",
" clusterDescription=f\"{MAINT_CLUSTER_NAME} cluster created with create_all notebook\",\n",
" code=CODE_CONFIG,\n",
" initializationScript=INIT_SCRIPT,\n",
" commandLineArguments=CMD_ARGS,\n",
" azMode=AZ_MODE,\n",
" availabilityZoneId=AZ_ID,\n",
" vpcConfiguration=VPC_CONFIG\n",
" )\n",
"else:\n",
" print(f\"Cluster: {MAINT_CLUSTER_NAME} already exists\")\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a3c2cb99-46d9-4e7d-b2b5-9a79399c4168",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# cluster already exists?\n",
"resp = get_kx_cluster(client, environmentId=ENV_ID, clusterName=QUERY_CLUSTER_NAME)\n",
"\n",
"if resp is None:\n",
" resp = client.create_kx_cluster(\n",
" environmentId=ENV_ID, \n",
" clusterName=QUERY_CLUSTER_NAME,\n",
" clusterType='GP',\n",
" releaseLabel = '1.0',\n",
" executionRole=EXECUTION_ROLE,\n",
" databases=QUERY_DATABASE_CONFIG,\n",
" scalingGroupConfiguration={\n",
" 'memoryReservation': 6,\n",
" 'nodeCount': 1,\n",
" 'scalingGroupName': SCALING_GROUP_NAME,\n",
" },\n",
" clusterDescription=f\"{QUERY_CLUSTER_NAME} cluster created with create_all notebook\",\n",
" code=CODE_CONFIG,\n",
" initializationScript=\"initdb.q\",\n",
" commandLineArguments=CMD_ARGS,\n",
" azMode=AZ_MODE,\n",
" availabilityZoneId=AZ_ID,\n",
" vpcConfiguration=VPC_CONFIG\n",
" )\n",
"else:\n",
" print(f\"Cluster: {QUERY_CLUSTER_NAME} already exists\")\n"
]
},
{
"cell_type": "markdown",
"id": "ec822269-6c16-4ae9-9116-fdb4a048682b",
"metadata": {},
"source": [
"### Wait for Cluster to Create"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "8efea149-22e9-4503-9c31-903b742a77eb",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster: dbmaint_cluster_maint status is PENDING, total wait 0:00:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:01:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:01:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:02:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:02:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:03:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:03:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:04:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:04:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:05:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:05:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:06:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:06:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:07:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:07:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:08:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:08:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:09:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:09:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:10:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:10:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:11:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:11:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:12:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:12:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:13:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:13:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:14:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:14:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:15:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is CREATING, total wait 0:15:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_maint status is now RUNNING, total wait 0:16:00\n",
"Cluster: dbmaint_cluster_query status is CREATING, total wait 0:00:00, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_query status is CREATING, total wait 0:00:30, waiting 30 sec ...\n",
"Cluster: dbmaint_cluster_query status is now RUNNING, total wait 0:01:00\n",
"** ALL DONE **\n"
]
}
],
"source": [
"for c in all_clusters:\n",
" wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=c, show_wait=True)\n",
"print(\"** ALL DONE **\")"
]
},
{
"cell_type": "markdown",
"id": "9e91a23b-b100-4763-9c50-c819f5824202",
"metadata": {},
"source": [
"# All Processes Running"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "86f33240-bb12-49f3-8d9c-5783c25eb182",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last Run: 2024-11-26 19:15:18.773515\n"
]
}
],
"source": [
"print( f\"Last Run: {datetime.datetime.now()}\" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4522c408-df94-4d9c-8bd1-2f91ead485cb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}