ManagedkdbInsights/boto/pykx_query_welcomedb.ipynb (374 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"id": "1334d92e-34f4-448c-b6af-73cba89e9d6d",
"metadata": {},
"source": [
"# PyKX Query Welcome Database\n",
"This notebook demonstrates querying the welcomedb database using PyKX."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "11e9b62a-9cda-4a57-99ca-3b13aff6b7f3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import warnings\n",
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
"\n",
"import os\n",
"import boto3\n",
"import json\n",
"import datetime\n",
"\n",
"import pykx as kx\n",
"\n",
"from managed_kx import *\n",
"from env import *\n",
"\n",
"CLUSTER_NAME=\"cluster_welcomedb\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cb429a9f-e8a4-41d2-8572-c4e8a49ad387",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Using credentials and create service client\n",
"session = boto3.Session()\n",
"\n",
"# create finspace client\n",
"client = session.client(service_name='finspace')"
]
},
{
"cell_type": "markdown",
"id": "32c26a3e-e51e-4981-951b-efca08e20e9a",
"metadata": {},
"source": [
"# Connect to Cluster"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "125c2a8e-1880-4043-9230-1089778e5bf8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Query the HDB\n",
"hdb = get_pykx_connection(client, \n",
" environmentId=ENV_ID, clusterName=CLUSTER_NAME, \n",
" userName=KDB_USERNAME, boto_session=session)"
]
},
{
"cell_type": "markdown",
"id": "96df5426-05fd-442c-b8f6-8cb336abe085",
"metadata": {},
"source": [
"# Table Schema"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a0e9e88b-2c48-4c12-9f68-6be7119bd5e3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>t</th>\n",
" <th>f</th>\n",
" <th>a</th>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>date</th>\n",
" <td>b'd'</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>sym</th>\n",
" <td>b's'</td>\n",
" <td></td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>time</th>\n",
" <td>b'p'</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>number</th>\n",
" <td>b'j'</td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t f a\n",
"c \n",
"date b'd' \n",
"sym b's' p\n",
"time b'p' \n",
"number b'j' "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"schema_pdf = hdb(\"meta(`example)\").pd()\n",
"\n",
"display(schema_pdf)"
]
},
{
"cell_type": "markdown",
"id": "d797007e-355b-4fee-9c6e-a11273d27d76",
"metadata": {},
"source": [
"# Query Table"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f6107bbb-e8fd-46df-97a1-6033d628f38b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>counts</th>\n",
" <th>avg_num</th>\n",
" <th>avg_sq_num</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2024-11-16</th>\n",
" <td>1000000</td>\n",
" <td>499800.464405</td>\n",
" <td>3.331963e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-17</th>\n",
" <td>1000000</td>\n",
" <td>499865.008159</td>\n",
" <td>3.332692e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-18</th>\n",
" <td>1000000</td>\n",
" <td>499912.379127</td>\n",
" <td>3.332060e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-19</th>\n",
" <td>1000000</td>\n",
" <td>500078.393386</td>\n",
" <td>3.334004e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-20</th>\n",
" <td>1000000</td>\n",
" <td>500264.684412</td>\n",
" <td>3.336606e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-21</th>\n",
" <td>1000000</td>\n",
" <td>499849.962912</td>\n",
" <td>3.333436e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-22</th>\n",
" <td>1000000</td>\n",
" <td>500082.257829</td>\n",
" <td>3.334472e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-23</th>\n",
" <td>1000000</td>\n",
" <td>500169.492354</td>\n",
" <td>3.336027e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-24</th>\n",
" <td>1000000</td>\n",
" <td>499903.047577</td>\n",
" <td>3.331833e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2024-11-25</th>\n",
" <td>1000000</td>\n",
" <td>500277.492790</td>\n",
" <td>3.335924e+11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" counts avg_num avg_sq_num\n",
"date \n",
"2024-11-16 1000000 499800.464405 3.331963e+11\n",
"2024-11-17 1000000 499865.008159 3.332692e+11\n",
"2024-11-18 1000000 499912.379127 3.332060e+11\n",
"2024-11-19 1000000 500078.393386 3.334004e+11\n",
"2024-11-20 1000000 500264.684412 3.336606e+11\n",
"2024-11-21 1000000 499849.962912 3.333436e+11\n",
"2024-11-22 1000000 500082.257829 3.334472e+11\n",
"2024-11-23 1000000 500169.492354 3.336027e+11\n",
"2024-11-24 1000000 499903.047577 3.331833e+11\n",
"2024-11-25 1000000 500277.492790 3.335924e+11"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows: 10,000,000\n"
]
}
],
"source": [
"# Dates, counts, and analytics\n",
"hdb_pdf = hdb(\"select counts:count i, avg_num: avg number, avg_sq_num: avg sq number by date from example\").pd()\n",
"\n",
"# Number of Rows\n",
"hdb_rows = hdb(\"count example\").py()\n",
"\n",
"display(hdb_pdf)\n",
"\n",
"# Number of Rows\n",
"print(f\"Rows: {hdb_rows:,}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "70e7ef98-be09-4d93-ae90-2e957732de4e",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last Run: 2024-11-26 15:26:43.710405\n"
]
}
],
"source": [
"print( f\"Last Run: {datetime.datetime.now()}\" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f00c649f-6cb9-4723-8cea-e34b0efa76e6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}