ManagedkdbInsights/boto/r_query_welcomedb.ipynb (269 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "id": "b059d8b4-3abd-4654-8bf7-d0f8b5143730", "metadata": {}, "source": [ "# Query Data with R\n", "This notebook shows how to connect to a FinSpace Managed kdb cluster using the [rkdb](https://github.com/KxSystems/rkdb) library.\n", "\n", "## Setup\n", "Update [env.r](env.r) with you environment's information (github has an empty example).\n", "\n", "## References\n", "- [rkdb](https://github.com/KxSystems/rkdb) library (github)\n", "- [paws](https://cran.r-project.org/web/packages/paws/index.html) library (CRAN) \n", "- [nanotime](https://cran.r-project.org/web/packages/nanotime/index.html) library (CRAN) " ] }, { "cell_type": "code", "execution_count": 1, "id": "9fab5941-5fc1-420c-ba21-b78e7af11751", "metadata": {}, "outputs": [], "source": [ "#' Test if package is already installed\n", "#' \n", "#' @param mypkg package name to check\n", "#' @returns TRUE: if package is installed, else FALSE\n", "#'\n", "is.installed <- function(mypkg) {\n", " is.element(mypkg, installed.packages()[,1]) \n", "}\n", "\n", "#' Install package if not installed\n", "#' \n", "#' @param x package name to confirm installation\n", "#' @returns NULL\n", "#'\n", "pkgTest <- function(x) {\n", " if ( !is.installed(x) ) {\n", " install.packages(x, dep=TRUE)\n", " } \n", " return(NULL)\n", "}\n", "\n", "#' Get a connection to a FinSpace managed kdb cluster\n", "#' \n", "#' @param environment_id FinSpace environment ID\n", "#' @param cluster_name FinSpace cluster name to connect to\n", "#' @param user_name FinSpace user connecting to cluster \n", "#' @returns A connection to the requested cluster\n", "#'\n", "get_connection <- function(environment_id, cluster_name, user_name) {\n", " # create finspace service client (uses default credentials)\n", " finspace <- paws::finspace()\n", " \n", " # Call service to get information about the FinSpace user\n", " user_resp <- finspace$get_kx_user(\n", " environmentId = environment_id,\n", " userName = user_name\n", " )\n", " \n", " # get the connection string to the cluster for the finspace user, passing the userARN\n", " conn_str <- finspace$get_kx_connection_string(\n", " environmentId = environment_id, \n", " userArn = user_resp$userArn, \n", " clusterName = cluster_name\n", " )\n", " \n", " # Break into parts for connection function call\n", " split_df <- str_split(conn_str$signedConnectionString, \":\")\n", " \n", " # split the connection string\n", " conn_parts <- split_df[[1]]\n", " \n", " # get out the bits\n", " host<-gsub('/', '', conn_parts[3])\n", " port<-conn_parts[4]\n", " user<-conn_parts[5]\n", " pass<-conn_parts[6]\n", " \n", " # username password are packed together for connection call\n", " userpass<-paste(user, pass, sep=\":\")\n", "\n", " return(open_connection(host, port, userpass))\n", "}" ] }, { "cell_type": "code", "execution_count": 2, "id": "9eead7ab-69ce-4ac6-81ea-1b5afc08b54d", "metadata": { "tags": [] }, "outputs": [], "source": [ "# ensure needed packages are installed\n", "for (p in c(\"paws\", \"nanotime\", \"stringr\", \"devtools\") ) {\n", " pkgTest(p)\n", "}\n", "\n", "# rkdb is not on CRAN, installing different (uses devtools)\n", "if ( !is.installed(\"rkdb\") ) {\n", " devtools::install_github('KxSystems/rkdb')\n", "} " ] }, { "cell_type": "code", "execution_count": 3, "id": "f18ca78a-660c-40fb-911b-69a873dafd2f", "metadata": {}, "outputs": [], "source": [ "library(rkdb)\n", "library(nanotime)\n", "library(stringr)\n", "\n", "# Source FinSpace environment information\n", "source(\"env.r\")\n", "\n", "# Cluster to connect to, created by welcome notebook: welcome.ipynb\n", "CLUSTER_NAME<-\"HDB_welcomedb\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "4da309b6-8b4b-4ae3-beed-1fd23a090b0b", "metadata": {}, "outputs": [], "source": [ "# connect to cluster\n", "conn <- get_connection(ENV_ID, CLUSTER_NAME, KDB_USERNAME)" ] }, { "cell_type": "code", "execution_count": 5, "id": "09f2ff5e-c20e-407c-a3a3-3b9ce08d634a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "'example'" ], "text/latex": [ "'example'" ], "text/markdown": [ "'example'" ], "text/plain": [ "[1] \"example\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Available Tables in FinSpace Cluster\n", "execute(conn, 'tables[]')" ] }, { "cell_type": "code", "execution_count": 6, "id": "eb639f4d-a4c6-4274-8401-4ab0a59dcdbf", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "<table class=\"dataframe\">\n", "<caption>A data.frame: 5 × 4</caption>\n", "<thead>\n", "\t<tr><th scope=col>date</th><th scope=col>sym</th><th scope=col>time</th><th scope=col>number</th></tr>\n", "\t<tr><th scope=col>&lt;date&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;nanotime&gt;</th><th scope=col>&lt;int64&gt;</th></tr>\n", "</thead>\n", "<tbody>\n", "\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td> 53231</td></tr>\n", "\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>153560</td></tr>\n", "\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>449428</td></tr>\n", "\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>631966</td></tr>\n", "\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>941566</td></tr>\n", "</tbody>\n", "</table>\n" ], "text/latex": [ "A data.frame: 5 × 4\n", "\\begin{tabular}{llll}\n", " date & sym & time & number\\\\\n", " <date> & <chr> & <nanotime> & <int64>\\\\\n", "\\hline\n", "\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 53231\\\\\n", "\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 153560\\\\\n", "\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 449428\\\\\n", "\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 631966\\\\\n", "\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 941566\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 5 × 4\n", "\n", "| date &lt;date&gt; | sym &lt;chr&gt; | time &lt;nanotime&gt; | number &lt;int64&gt; |\n", "|---|---|---|---|\n", "| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 53231 |\n", "| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 153560 |\n", "| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 449428 |\n", "| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 631966 |\n", "| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 941566 |\n", "\n" ], "text/plain": [ " date sym time number\n", "1 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 53231\n", "2 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 153560\n", "3 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 449428\n", "4 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 631966\n", "5 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 941566" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Sample Records from Quote table\n", "\n", "execute(conn, 'select from example where date=min date, i<5')" ] }, { "cell_type": "code", "execution_count": null, "id": "aab2349a-dded-408f-bd44-fbfdc1e801bd", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.4.2" } }, "nbformat": 4, "nbformat_minor": 5 }