ManagedkdbInsights/boto/pykx_query_welcomedb.ipynb (374 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "id": "1334d92e-34f4-448c-b6af-73cba89e9d6d", "metadata": {}, "source": [ "# PyKX Query Welcome Database\n", "This notebook demonstrates querying the welcomedb database using PyKX." ] }, { "cell_type": "code", "execution_count": 1, "id": "11e9b62a-9cda-4a57-99ca-3b13aff6b7f3", "metadata": { "tags": [] }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "import os\n", "import boto3\n", "import json\n", "import datetime\n", "\n", "import pykx as kx\n", "\n", "from managed_kx import *\n", "from env import *\n", "\n", "CLUSTER_NAME=\"cluster_welcomedb\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "cb429a9f-e8a4-41d2-8572-c4e8a49ad387", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Using credentials and create service client\n", "session = boto3.Session()\n", "\n", "# create finspace client\n", "client = session.client(service_name='finspace')" ] }, { "cell_type": "markdown", "id": "32c26a3e-e51e-4981-951b-efca08e20e9a", "metadata": {}, "source": [ "# Connect to Cluster" ] }, { "cell_type": "code", "execution_count": 3, "id": "125c2a8e-1880-4043-9230-1089778e5bf8", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Query the HDB\n", "hdb = get_pykx_connection(client, \n", " environmentId=ENV_ID, clusterName=CLUSTER_NAME, \n", " userName=KDB_USERNAME, boto_session=session)" ] }, { "cell_type": "markdown", "id": "96df5426-05fd-442c-b8f6-8cb336abe085", "metadata": {}, "source": [ "# Table Schema" ] }, { "cell_type": "code", "execution_count": 4, "id": "a0e9e88b-2c48-4c12-9f68-6be7119bd5e3", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>t</th>\n", " <th>f</th>\n", " <th>a</th>\n", " </tr>\n", " <tr>\n", " <th>c</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>date</th>\n", " <td>b'd'</td>\n", " <td></td>\n", " <td></td>\n", " </tr>\n", " <tr>\n", " <th>sym</th>\n", " <td>b's'</td>\n", " <td></td>\n", " <td>p</td>\n", " </tr>\n", " <tr>\n", " <th>time</th>\n", " <td>b'p'</td>\n", " <td></td>\n", " <td></td>\n", " </tr>\n", " <tr>\n", " <th>number</th>\n", " <td>b'j'</td>\n", " <td></td>\n", " <td></td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " t f a\n", "c \n", "date b'd' \n", "sym b's' p\n", "time b'p' \n", "number b'j' " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "schema_pdf = hdb(\"meta(`example)\").pd()\n", "\n", "display(schema_pdf)" ] }, { "cell_type": "markdown", "id": "d797007e-355b-4fee-9c6e-a11273d27d76", "metadata": {}, "source": [ "# Query Table" ] }, { "cell_type": "code", "execution_count": 5, "id": "f6107bbb-e8fd-46df-97a1-6033d628f38b", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>counts</th>\n", " <th>avg_num</th>\n", " <th>avg_sq_num</th>\n", " </tr>\n", " <tr>\n", " <th>date</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2024-11-16</th>\n", " <td>1000000</td>\n", " <td>499800.464405</td>\n", " <td>3.331963e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-17</th>\n", " <td>1000000</td>\n", " <td>499865.008159</td>\n", " <td>3.332692e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-18</th>\n", " <td>1000000</td>\n", " <td>499912.379127</td>\n", " <td>3.332060e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-19</th>\n", " <td>1000000</td>\n", " <td>500078.393386</td>\n", " <td>3.334004e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-20</th>\n", " <td>1000000</td>\n", " <td>500264.684412</td>\n", " <td>3.336606e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-21</th>\n", " <td>1000000</td>\n", " <td>499849.962912</td>\n", " <td>3.333436e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-22</th>\n", " <td>1000000</td>\n", " <td>500082.257829</td>\n", " <td>3.334472e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-23</th>\n", " <td>1000000</td>\n", " <td>500169.492354</td>\n", " <td>3.336027e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-24</th>\n", " <td>1000000</td>\n", " <td>499903.047577</td>\n", " <td>3.331833e+11</td>\n", " </tr>\n", " <tr>\n", " <th>2024-11-25</th>\n", " <td>1000000</td>\n", " <td>500277.492790</td>\n", " <td>3.335924e+11</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " counts avg_num avg_sq_num\n", "date \n", "2024-11-16 1000000 499800.464405 3.331963e+11\n", "2024-11-17 1000000 499865.008159 3.332692e+11\n", "2024-11-18 1000000 499912.379127 3.332060e+11\n", "2024-11-19 1000000 500078.393386 3.334004e+11\n", "2024-11-20 1000000 500264.684412 3.336606e+11\n", "2024-11-21 1000000 499849.962912 3.333436e+11\n", "2024-11-22 1000000 500082.257829 3.334472e+11\n", "2024-11-23 1000000 500169.492354 3.336027e+11\n", "2024-11-24 1000000 499903.047577 3.331833e+11\n", "2024-11-25 1000000 500277.492790 3.335924e+11" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Rows: 10,000,000\n" ] } ], "source": [ "# Dates, counts, and analytics\n", "hdb_pdf = hdb(\"select counts:count i, avg_num: avg number, avg_sq_num: avg sq number by date from example\").pd()\n", "\n", "# Number of Rows\n", "hdb_rows = hdb(\"count example\").py()\n", "\n", "display(hdb_pdf)\n", "\n", "# Number of Rows\n", "print(f\"Rows: {hdb_rows:,}\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "70e7ef98-be09-4d93-ae90-2e957732de4e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last Run: 2024-11-26 15:26:43.710405\n" ] } ], "source": [ "print( f\"Last Run: {datetime.datetime.now()}\" )" ] }, { "cell_type": "code", "execution_count": null, "id": "f00c649f-6cb9-4723-8cea-e34b0efa76e6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 5 }